本文使用sklearn对digits进行分类。因为维度低,所以使用SVM可以得到不错的准确率。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys  
from sklearn.datasets import load_digits # 加载手写数字识别数据
import pylab as pl
from sklearn.cross_validation import train_test_split # 训练测试数据分割
from sklearn.preprocessing import StandardScaler # 标准化工具
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report # 预测结果分析工具

reload(sys)
sys.setdefaultencoding('utf-8')

digits = load_digits()
# 数据纬度,1797幅图,8*8
print digits.data.shape

# 分割数据
X_train, X_test, Y_train, Y_test = train_test_split(digits.data, digits.target, test_size=0.20, random_state=33)

ss = StandardScaler()
# fit是实例方法,必须由实例调用
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

# 支持向量机
csvm = svm.SVC(gamma=0.020)
csvm.fit(X_train, Y_train)
Y_predict=csvm.predict(X_test)
print classification_report(Y_test, Y_predict, target_names=digits.target_names.astype(str))

# 随机森林
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=450)
clf = clf.fit(X_train, Y_train)
Y_predict=clf.predict(X_test)
print classification_report(Y_test, Y_predict, target_names=digits.target_names.astype(str))

结果

SVM结果如下:

随机森林结果如下: