Skip to content

Commit de7767f

Browse files
committed
优化注释和 README
1 parent 809d4ad commit de7767f

File tree

3 files changed

+46
-34
lines changed

3 files changed

+46
-34
lines changed

README.md

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
1-
# Python 验证码识别
1+
# 图形验证码识别系统
2+
- 由 Python 编写
3+
- 超级简单
4+
- 不怎么靠谱
5+
- 不建议用于生产环境
26

3-
> 由 Python 编写的超级简单的不怎么靠谱的验证码识别系统
7+
## 原理
8+
- 使用 `PIL` 处理验证码图片
9+
- 使用 ORC 方式识别验证码
10+
- `pytesseract`
411

5-
- 使用 PIL 处理验证码图片
6-
- 使用 ORC 方式识别验证码
12+
13+
## 文件
14+
- `program.py`:程序文件
15+
- `class GetCode`
16+
- `dict headers`
17+
- 75行在生产环境下请删除或注释掉
18+
- `test.py`:测试文件
19+
- 针对 [http://chaxun.heyuanedu.cn:88/](http://chaxun.heyuanedu.cn:88/ "http://chaxun.heyuanedu.cn:88/") 系统的测试
20+
- 靠运气和大量尝试来尝试验证码
21+
- 图片储存至 image/

program.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,50 +10,50 @@
1010

1111
class GetCode:
1212
def __init__(self, path, url=None, headers=headers, origin='chaxun.heyuanedu.cn:88', threshold=185): # 初始化对象实例
13-
self.path = path
13+
self.path = path # 赋值变量
1414
self.url = url
1515
self.headers = headers
1616
self.origin = origin
1717
self.threshold = threshold
1818

1919
def getReq(self, url=None, headers=None, origin='chaxun.heyuanedu.cn:88'): # 获取 Request 对象
20-
self.url = url if url != None else self.url
20+
self.url = url if url != None else self.url # 赋值变量
2121
self.headers = headers if headers != None else self.headers
2222
self.origin = origin if origin != None else self.origin
23-
return Request(self.url, None, self.headers, self.origin)
23+
return Request(self.url, None, self.headers, self.origin) # 返回 Request 对象
2424

2525
def setting(self, path=None, url=None, headers=None, origin=None, threshold=None): # 设置实例的各类变量
26-
self.path = path if path != None else self.path
26+
self.path = path if path != None else self.path # 赋值变量
2727
self.url = url if url != None else self.url
2828
self.headers = headers if headers != None else self.headers
2929
self.origin = origin if origin != None else self.origin
3030
self.threshold = threshold if threshold != None else self.threshold
3131

3232
def getSession(self, url=None, headers=None, origin=None): # 获取对应系统的 Session ,注意:请勿重复调用!否则将可能导致不可预知的问题!
33-
self.url = url if url != None else self.url
33+
self.url = url if url != None else self.url # 赋值变量
3434
self.headers = headers if headers != None else self.headers
3535
self.origin = origin if origin != None else self.origin
36-
self.headers['Cookie'] = urlopen(self.getReq()).info()['Set-Cookie'].split(';')[0]
37-
return self.headers
36+
self.headers['Cookie'] = urlopen(self.getReq()).info()['Set-Cookie'].split(';')[0] # 写入 Cookie(无法处理多个 Cookies 的情况!如果重复调用也可能导致出现问题)(其实就是因为我懒)
37+
return self.headers # 返回 headers
3838

3939
def download(self, url=None, path=None, headers=None, origin=None): # 下载验证码图片
40-
self.url = url if url != None else self.url
40+
self.url = url if url != None else self.url # 赋值变量
4141
self.path = path if path != None else self.path
4242
self.headers = headers if headers != None else self.headers
4343
self.origin = origin if origin != None else self.origin
4444
if not os.path.exists(self.path[:self.path.rfind(os.sep)]): # 防止目录不存在出错
45-
os.mkdir(self.path[:self.path.rfind(os.sep)])
46-
with open(self.path, 'wb') as f:
47-
size = f.write(urlopen(self.getReq()).read())
48-
return size
45+
os.mkdir(self.path[:self.path.rfind(os.sep)]) # 创建目录
46+
with open(self.path, 'wb') as f: # 开文件
47+
size = f.write(urlopen(self.getReq()).read()) # 写文件(图片)
48+
return size # 返回文件大小
4949

5050
def identify(self, path=None, threshold=None): # 识别验证码
51-
self.path = path if path != None else self.path
51+
self.path = path if path != None else self.path # 赋值变量
5252
self.threshold = threshold if threshold != None else self.threshold
53-
image = Image.open(self.path)
54-
pixdata = image.load()
55-
w, h = image.size
56-
for y in range(h): # 处理多余色彩
53+
image = Image.open(self.path) # PIL 打开图片
54+
pixdata = image.load() # 载入图片到变量
55+
w, h = image.size # 获取宽高
56+
for y in range(h): # 处理色彩
5757
for x in range(w):
5858
if pixdata[x, y][0] < self.threshold and pixdata[x, y][1] < self.threshold and pixdata[x, y][2] < self.threshold:
5959
pixdata[x, y] = (0, 0, 0)

test.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,29 @@
77
import re, os
88

99
ele0 = ''
10-
p = re.compile(r'(\d)[^0-9]*(\d)[^0-9]*(\d)[^0-9]*(\d)') # 此系统验证码正则
10+
p = re.compile(r'(\d)[^0-9]*(\d)[^0-9]*(\d)[^0-9]*(\d)') # 验证码正则
1111

1212
getCode = GetCode('image%scode.png'%os.sep, 'http://chaxun.heyuanedu.cn:88/validatecode.php?act=getimg') # 实例化对象
1313
getCode.getSession() # 获取 Session
1414

1515
userName = input('姓名:')
1616
identity = input('准考证号:')
1717

18-
while ele0 == '': # 识别通过正则但错误循环
18+
while ele0 == '': # 通过正则但错误
1919
result2 = None
20-
while result2 == None: # 识别无法通过正则循环
21-
#time.sleep(0.2)
20+
while result2 == None: # 无法通过正则 #time.sleep(0.2)
2221
print('Downloading...')
23-
getCode.download()
22+
getCode.download() # 下载验证码
2423
print('Identifying...')
25-
result = getCode.identify()
26-
result2 = ('%s%s%s%s'%p.search(result).groups() if p.search(result) != None else None)
27-
result0 = (result2 if result2 != None else result) if result != '' else 'failure!'
24+
result = getCode.identify() # 识别
25+
result2 = ('%s%s%s%s'%p.search(result).groups() if p.search(result) != None else None) # 正则判断
26+
result0 = (result2 if result2 != None else result) if result != '' else 'failure!' # 识别结果
2827
print('Result: ' + result0)
2928

3029
req = Request('http://chaxun.heyuanedu.cn:88/search.php', urlencode({ 'userName': userName, 'identity': identity, 'code': result0 }).encode(), headers, 'chaxun.heyuanedu.cn:88')
3130
res = urlopen(req) # 获取结果
32-
html = res.read().decode()
33-
#print(res.headers)
34-
#print(html)
31+
html = res.read().decode() # 获取结果 #print(res.headers) #print(html)
3532
soup = BeautifulSoup(html,'lxml') # 解析 html
36-
ele = soup.select('#printGrade')
37-
ele0 = str(ele)[1:-1]
33+
ele = soup.select('#printGrade') # 获取相应元素
34+
ele0 = str(ele)[1:-1] # 获取结果(str)
3835
print(ele0 if ele0 != '' else 'Error!')

0 commit comments

Comments
 (0)