IBSVM软件包是台湾大学林智仁(Chih-Jen Lin)博士等用C++实现的LIBSVM库,可以说是使用最方便的SVM训练工具[71]。可以解决分类问题(包括C-SVC、n-SVC)、回归问题(包括e-SVR、n-SVR)以及分布估计(one-class-SVM )等问题,提供了线性、多项式、径向基和S形函数四种常用的核函数供选择,可以有效地解决多类问题、交叉验证选择参数、对不平衡样本加权、多类问题的概率估计等。
# coding:utf-8
import urllib.request
import cairosvg, cv2
from svmutil import *
from PIL import Image
captchaUrl = "https://xxx.com" + "/server?model=captcha&action=getCaptcha"
req = urllib.request.Request(captchaUrl)
res = urllib.request.urlopen(req)
svg = res.read()
def svgToPng(svgSource, outputName):
cairosvg.svg2png(bytestring=svgSource, write_to="temp\\%s" % outputName)
def _get_dynamic_binary_image(tag):
im = cv2.imread("temp\\%s.png"%tag)
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) # 灰值化
# 二值化
th1 = cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 1)
cv2.imwrite("temp\\%s-binary.png"%tag, th1)
return th1
def corpCaptcha(tag):
image = cv2.imread("temp\\%s-binary.png"%tag)
cv2.imwrite("temp\\%s_p1.png"%tag, image[:, 10:40])
cv2.imwrite("temp\\%s_p2.png"%tag, image[:, 30:60])
cv2.imwrite("temp\\%s_p3.png"%tag, image[:, 55:85])
cv2.imwrite("temp\\%s_p4.png"%tag, image[:, 80:110])
def computeTestData(tag):
img1 = Image.open("temp\\%s_p1.png"%tag)
img2 = Image.open("temp\\%s_p2.png"%tag)
img3 = Image.open("temp\\%s_p3.png"%tag)
img4 = Image.open("temp\\%s_p4.png"%tag)
with open("temp\\%s_data.txt"%tag,"w") as f:
f.write(get_feature(img1,0)+"\n")
f.write(get_feature(img2,0)+"\n")
f.write(get_feature(img3,0)+"\n")
f.write(get_feature(img4,0))
def get_feature(img,label):
width, height = img.size
pixel_cnt_list = []
for y in range(height):
pix_cnt_x = 0
for x in range(width):
if img.getpixel((x, y)) == (0,0,0): # 黑色点
pix_cnt_x += 1
pixel_cnt_list.append(pix_cnt_x)
for x in range(width):
pix_cnt_y = 0
for y in range(height):
if img.getpixel((x, y)) == (0,0,0): # 黑色点
pix_cnt_y += 1
pixel_cnt_list.append(pix_cnt_y)
return "%d "%label+" ".join(["%d:%s"%(i,j) for i,j in enumerate(pixel_cnt_list,1)])
def recognCaptcha(svg,tag):
svgToPng(svg, "%s.png"%tag)
_get_dynamic_binary_image(tag)
corpCaptcha(tag)
computeTestData(tag)
m = svm_load_model('captcha.model') # 读取模型
y, x = svm_read_problem("temp\\%s_data.txt"%tag)
p_label, p_acc, p_val = svm_predict(y, x, m)
return p_label
def asciiDecode(codeList):
m =map(lambda x:chr(int(x)),codeList)
ret = []
for i in m:
ret.append(i)
return "".join(ret)
def learn(tag,rightCode,local): #rightCode=["a","G"] local=[1,2]
with open("train_data.txt", "a") as f:
for j,i in enumerate(local):
if rightCode[j]!="o":
img = Image.open("temp\\%s_p%d.png" % (tag,i))
f.write("\n"+get_feature(img, ord(rightCode[j])))
y,x = svm_read_problem("train_data.txt")
m = svm_train(y,x,"-t 0 -c 4 -b 1")
svm_save_model('captcha.model', m) #保存模型
if __name__ == '__main__':
args = sys.argv
if args[1]=="test":
code = recognCaptcha(svg, "xy")
print(asciiDecode(code))
elif args[1]=="learn":
learn("xy", args[2], [1, 2, 3, 4])
m = svm_load_model('captcha.model') # 读取模型
y, x = svm_read_problem("temp\\xy_data.txt")
p_label, p_acc, p_val = svm_predict(y, x, m)
print(asciiDecode(p_label))
elif args[1]=="reload":
if len(args)<3:
config = ""
else:
config = args[2:]
y, x = svm_read_problem("train_data.txt")
m = svm_train(y, x,config) #最优参数: -t 0 -c 4 -b 1
svm_save_model('captcha.model', m) # 保存模型