sklearn学习笔记教学教材

资源描述

《sklearn学习笔记教学教材》由会员分享，可在线阅读，更多相关《sklearn学习笔记教学教材（6页珍藏版）》请在金锄头文库上搜索。

1、sklearn学习笔记精品文档sklearn学习笔记模型验证方法1.学习率曲线（learn_curve）2.交叉验证得分（cross_val_score）3.验证曲线（validation_curve）一.学习率曲线计算指定的学习器模型在不同大小的训练集上经过交叉验证的训练得分和测试得分首先，用一个交叉验证生成器划分整体数据集K次，每一次划分都有一个训练集和测试集。然后从每次划分的训练集中拿出若干个数量不断增加的子集，在这些训练集上训练模型。然后再计算模型在对应的子训练集和测试集上的得分，最后，对于在每种子训练集大小下，将K次训练集得分和测试集得分分别进行平均。python view pla

2、in copy print?# import numpy as np # from sklearn.model_selection import learning_curve, ShuffleSplit # from sklearn.datasets import load_digits # from sklearn.naive_bayes import GaussianNB # from sklearn import svm # import matplotlib.pyplot as plt # def plot_learning_curve(estimator, title, X, y,

3、ylim=None, cv=None,n_jobs=1, train_size=np.linspace(.1, 1.0, 5 ): # if _name_ = _main_: # plt.figure() # plt.title(title) # if ylim is not None: # plt.ylim(*ylim) # plt.xlabel(Training example) # plt.ylabel(score) # train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jo

4、bs=n_jobs, train_sizes=train_size) # train_scores_mean = np.mean(train_scores, axis=1) # train_scores_std = np.std(train_scores, axis=1) # test_scores_mean = np.mean(test_scores, axis=1) # test_scores_std = np.std(test_scores, axis=1) # plt.grid()#区域 # plt.fill_between(train_sizes, train_scores_mean

5、 - train_scores_std, # train_scores_mean + train_scores_std, alpha=0.1, # color=r) # plt.fill_between(train_sizes, test_scores_mean - test_scores_std, # test_scores_mean + test_scores_std, alpha=0.1, # color=g) # plt.plot(train_sizes, train_scores_mean, o-, color=r, # label=Training score) # plt.plo

6、t(train_sizes, test_scores_mean,o-,color=g, # label=Cross-validation score) # plt.legend(loc=best) # return plt # digits = load_digits() # X = digits.data # y = digits.target # cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)#切割100ci # estimator = GaussianNB() # title = Learning Curves

7、(naive_bayes) # plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4) # title = Learning Curves(SVM,RBF kernel, $gamma=0.001$) # cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)#交叉验证传入别的方法，而不是默认的k折交叉验证 # estimator = svm.SVC(gamma=0.001) # plot_learning_curve(est

8、imator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4) # plt.show() 二.交叉验证得分python view plain copy print?# import matplotlib.pyplot as plt # from sklearn.model_selection import cross_val_score # import numpy as np # from sklearn import datasets, svm # digits = datasets.load_digits() # x = digits.data #

9、y = digits.target # vsc = svm.SVC(kernel=linear) # if _name_=_main_: # c_S = np.logspace(-10, 0, 10)#在范围内取是个对数 # # print (length, len(c_S) # scores = list() # scores_std = list() # for c in c_S: # vsc.C = c # this_scores = cross_val_score(vsc, x, y, n_jobs=4)#多线程 n_jobs，默认三次交叉验证 # scores.append(np.m

10、ean(this_scores) # scores_std.append(np.std(this_scores) # plt.figure(1, figsize=(4, 3)#绘图 # plt.clf() # plt.semilogx(c_S, scores)#划线 # plt.semilogx(c_S, np.array(scores)+np.array(scores_std), b-) # plt.semilogx(c_S, np.array(scores)-np.array(scores_std), b-) # locs, labels = plt.yticks() # plt.ytic

11、ks(locs, list(map(lambda X: %g % X, locs)#阶段点 # plt.ylabel(CV score) # plt.xlabel(parameter C) # plt.ylim(0, 1.1)#范围 # plt.show() 三.验证曲线当某个参数不断变化是，在每一个取值上计算出的模型在训练集和测试集上的得分在一个不断变化的参数上计算学习器的得分，类似于只有一个参数的网格搜索，但是这个函数也会计算训练集上的得分python view plain copy print?# from sklearn import svm # from sklearn.model_

12、selection import validation_curve # from sklearn.datasets import load_digits # import numpy as np # import matplotlib.pyplot as plt # digits = load_digits() # X = digits.data # y = digits.target # param_range = np.logspace(-6, -1, 5) # vsc = svm.SVC() # train_score, test_score = validation_curve(vsc

13、, X, y, param_name=gamma, param_range=param_range, cv=10, scoring=accuracy, n_jobs=1) # train_score_mean = np.mean(train_score, axis=1) # train_score_std = np.std(train_score, axis=1) # test_score_mean = np.mean(test_score, axis=1) # test_score_std = np.std(test_score, axis=1) # plt.title(validation

14、 curve with SVM) # plt.xlabel($gamma%) # plt.ylabel(Score) # plt.ylim() # lw = 2 # plt.semilogx(param_range, train_score_mean,label=training score, color=darkorange, lw=lw) # plt.fill_between(param_range, train_score_mean-train_score_std, train_score_mean+train_score_std, alpha=0.2, color=navy, lw=lw) # plt.legend(loc=best) # plt.show()收集于网络，如有侵权请联系管理员删除

展开阅读全文

sklearn学习笔记教学教材

最新文档