# 앙상블 모형 (투표, Voting) : 연비 예측

홀수개 모형을 데이터에 적합시켜 좀더 똑똑한 기계학습 모형을 개발해보자

- 선형회귀 모형
- 의사결정 모형
- SVM 모형

## 환경설정

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error as MSE

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.ensemble import VotingRegressor

## 데이터셋

In [2]:
# 2. 데이터셋
mpg_df = pd.read_csv('data/auto-mpg.csv', index_col='car name')
mpg_df = mpg_df[mpg_df.horsepower != '?']

# 3. 훈련/시험 데이터셋
y = mpg_df[['mpg']]
X = mpg_df.loc[:, 'cylinders':'origin']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 777)
y_train = np.ravel(y_train,order='C') 

## 기계학습 - 투표


In [3]:
reg_lr = LinearRegression()
reg_dt = DecisionTreeRegressor()
reg_svm = SVR(kernel='rbf')
    
regressors = [('Linear Regression', reg_lr),
               ('Decision Tree', reg_dt),
               ('SVM', reg_svm)]

for reg_name, reg in regressors:
    # 모형적합
    reg.fit(X_train, y_train)
    # 시험데이터 예측
    y_pred = reg.predict(X_test)
    # 시험데이터 평가
    print('{:s} : {:.3f}'.format(reg_name,
                                 MSE(y_test, y_pred)))

Linear Regression : 11.435
Decision Tree : 17.687
SVM : 14.808


## 예측 성능

In [4]:
vote_reg = VotingRegressor(estimators = regressors)
vote_reg.fit(X_train, y_train)
y_pred =  vote_reg.predict(X_test)

print('Voting Regression: {:.3f}'.format(MSE(y_test, y_pred)))

Voting Regression: 9.880
