# 앙상블 모형 (투표, Voting)

홀수개 모형을 데이터에 적합시켜 좀더 똑똑한 기계학습 모형을 개발해보자

- 로지스틱 모형
- 의사결정 모형
- KNN 모형

## 환경설정

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, f1_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier as KNN

from sklearn.ensemble import VotingClassifier

## 데이터셋

In [2]:
cancer_df = pd.read_csv('data/breast_cancer.csv')

# list(cancer_df.columns)
y = cancer_df[['diagnosis']]
X = cancer_df.loc[:, 'radius_mean':'fractal_dimension_worst']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1)
y_train = np.ravel(y_train,order='C') # KNN : A column-vector y was passed when a 1d array was expected

## 기계학습 - CV


In [3]:
clf_lr = LogisticRegression(random_state = 777, solver='lbfgs', max_iter=1000)
clf_knn = KNN()
clf_dt = DecisionTreeClassifier(random_state = 777)
    
classifiers = [('Logistic Regression', clf_lr),
               ('K Nearest Neighbours', clf_knn),
               ('Classification Tree', clf_dt)]

for clf_name, clf in classifiers:
    # 모형적합
    clf.fit(X_train, y_train)
    # 시험데이터 예측
    y_pred = clf.predict(X_test)
    # 시험데이터 평가
    print('{:s} : {:.3f}'.format(clf_name,
                                 accuracy_score(y_test, y_pred)))

Logistic Regression : 0.974
K Nearest Neighbours : 0.921
Classification Tree : 0.939


## 예측 성능

In [4]:
vote_clf = VotingClassifier(estimators=classifiers)
vote_clf.fit(X_train, y_train)
y_pred_array = vote_clf.predict(X_test)
y_pred = pd.DataFrame(y_pred_array, columns = ['diagnosis']) # Arrary to Dataframe

print('Voting Classifier: {:.3f}'.format(f1_score(y_test, y_pred,  pos_label="M")))

Voting Classifier: 0.964


In [5]:
y_test.reset_index(drop=True, inplace=True)
y_pred.reset_index(drop=True, inplace=True)

pd.concat([y_test, y_pred], axis = 1)

Unnamed: 0,diagnosis,diagnosis.1
0,M,M
1,B,B
2,B,B
3,M,M
4,B,B
...,...,...
109,B,B
110,B,B
111,B,B
112,B,B
