voting classifier with different features

Solutions on MaxInterview for voting classifier with different features by the best coders in the world

showing results for - "voting classifier with different features"
Walid
15 Feb 2019
1from sklearn.base import TransformerMixin, BaseEstimator
2import numpy as np
3from sklearn.pipeline import Pipeline
4from sklearn.linear_model import LogisticRegression
5from sklearn.svm import SVC
6from sklearn.datasets import load_iris
7from sklearn.model_selection import train_test_split
8from sklearn.ensemble import VotingClassifier
9
10######################
11# custom transformer for sklearn pipeline
12class ColumnExtractor(TransformerMixin, BaseEstimator):
13    def __init__(self, cols):
14        self.cols = cols
15
16    def transform(self, X):
17        col_list = []
18        for c in self.cols:
19            col_list.append(X[:, c:c+1])
20        return np.concatenate(col_list, axis=1)
21
22    def fit(self, X, y=None):
23        return self
24
25######################
26# processing data
27data = load_iris()
28X = data.data
29y = data.target
30X_train, X_test, y_train, y_test = train_test_split(X, y)
31
32######################
33# fit clf1 with df1
34pipe1 = Pipeline([
35    ('col_extract', ColumnExtractor( cols=range(0,2) )), # selecting features 0 and 1 (df1) to be used with LR (clf1)
36    ('clf', LogisticRegression())
37    ])
38
39pipe1.fit(X_train, y_train) # sanity check
40pipe1.score(X_test,y_test) # sanity check
41# output: 0.6842105263157895
42
43######################
44# fit clf2 with df2
45pipe2 = Pipeline([
46    ('col_extract', ColumnExtractor( cols=range(2,4) )), # selecting features 2 and 3 (df2) to be used with SVC (clf2)
47    ('clf', SVC(probability=True))
48    ])
49
50pipe2.fit(X_train, y_train) # sanity check
51pipe2.score(X_test,y_test) # sanity check
52# output: 0.9736842105263158
53
54######################
55# ensemble/voting classifier where clf1 fitted with df1 and clf2 fitted with df2
56eclf = VotingClassifier(estimators=[('df1-clf1', pipe1), ('df2-clf2', pipe2)], voting='soft', weights= [1, 0.5])
57eclf.fit(X_train, y_train)
58eclf.score(X_test,y_test)
59# output: 0.9473684210526315