1x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
2xgb=XGBClassifier()
3----------------------------------------------------------------------
4from sklearn.model_selection import GridSearchCV
5parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
6
7gscv=GridSearchCV(xgb,parameters,scoring='accuracy',n_jobs=-1,cv=10)
8grid_search=gscv.fit(x,y)
9grid_search.best_params_
10-----------------------------------------------------------------------
11x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
12xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
13xgb.fit(x,y)
14pred=xgb.predict(x_test)
15print('Accuracy= ',accuracy_score(y_test,pred))
16-----------------------------------------------------------------------
17#Cross validating (for classification) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
18from sklearn.model_selection import cross_val_predict
19xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
20cvs=cross_val_score(xgb,x,y,scoring='accuracy',cv=10)
21print('cross_val_scores= ',cvs.mean())
22y_pred=cross_val_predict(xgb,x,y,cv=10)
23conf_mat=confusion_matrix(y_pred,y)
24conf_mat
25---------------------------------------------------------------------------
26#Cross validating(for regression) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
27gbm=GradientBoostingRegressor(max_depth=7,min_samples_leaf=1,n_estimators=100)
28cvs=cross_val_score(xgb,x,y,scoring='r2',cv=5)
29print('cross_val_scores= ',cvs.mean())
30-------------------------------------------------------------------------------
31#parameters
32#xgboost:-
33parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
34#random forest
35parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
36#gradientboost
37parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
38#kneighbors
39parameters={'n_neighbors':[5,6,8,10,12,14,15]}
40#logistic regression
41parameters={'penalty':['l1','l2'],'C':[1,2,3,4,5]}
42#gaussiannb
43parameters={'var_smoothing': np.logspace(0,-9, num=100)}
44#SVC
45parameters=[{'C':[0.1,0.5,1,2,3],'kernel':['rbf','poly']}]
46#adaboost
47parameters=[{'base_estimator':[lr],'learning_rate':[1,0.1,0.001],'n_estimators':[100,150,250]}]
48#decesion tree
49parameters=[{'criterion':['gini','entropy'],'max_depth':[5,7,9,10],'min_samples_leaf':[1,2]}]
1from sklearn.svm import LinearSVC
2from sklearn.linear_model import LogisticRegression
3from sklearn.ensemble import RandomForestClassifier
4from sklearn.neural_network import MLPClassifier
5from sklearn.model_selection import GridSearchCV
6from sklearn.model_selection import train_test_split
7
8X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
9
10names = [
11 "Naive Bayes",
12 "Linear SVM",
13 "Logistic Regression",
14 "Random Forest",
15 "Multilayer Perceptron"
16 ]
17
18classifiers = [
19 MultinomialNB(),
20 LinearSVC(),
21 LogisticRegression(),
22 RandomForestClassifier(),
23 MLPClassifier()
24]
25
26parameters = [
27 {'vect__ngram_range': [(1, 1), (1, 2)],
28 'clf__alpha': (1e-2, 1e-3)},
29 {'vect__ngram_range': [(1, 1), (1, 2)],
30 'clf__C': (np.logspace(-5, 1, 5))},
31 {'vect__ngram_range': [(1, 1), (1, 2)],
32 'clf__C': (np.logspace(-5, 1, 5))},
33 {'vect__ngram_range': [(1, 1), (1, 2)],
34 'clf__max_depth': (1, 2)},
35 {'vect__ngram_range': [(1, 1), (1, 2)],
36 'clf__alpha': (1e-2, 1e-3)}
37 ]
38
39for name, classifier, params in zip(names, classifiers, parameters):
40 clf_pipe = Pipeline([
41 ('vect', TfidfVectorizer(stop_words='english')),
42 ('clf', classifier),
43 ])
44 gs_clf = GridSearchCV(clf_pipe, param_grid=params, n_jobs=-1)
45 clf = gs_clf.fit(X_train, y_train)
46 score = clf.score(X_test, y_test)
47 print("{} score: {}".format(name, score))
48
1>>> from sklearn import svm, datasets
2>>> from sklearn.model_selection import GridSearchCV
3>>> iris = datasets.load_iris()
4>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
5>>> svc = svm.SVC()
6>>> clf = GridSearchCV(svc, parameters)
7>>> clf.fit(iris.data, iris.target)
8GridSearchCV(estimator=SVC(),
9 param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
10>>> sorted(clf.cv_results_.keys())
11['mean_fit_time', 'mean_score_time', 'mean_test_score',...
12 'param_C', 'param_kernel', 'params',...
13 'rank_test_score', 'split0_test_score',...
14 'split2_test_score', ...
15 'std_fit_time', 'std_score_time', 'std_test_score']