참고: 핵심 설명과 코드는 🔑로 표시되었으며 굳이 알아둘 필요가 없는 코드는 ✋로 표시되었다.
# 파이썬 ≥3.5
import sys
assert sys.version_info >= (3, 5)
# 사이킷런 ≥0.20
import sklearn
assert sklearn.__version__ >= "0.20"
# 공통 모듈 임포트
import numpy as np
import os
# 깔금한 그래프 출력을 위해
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# 그림 저장 위치 지정
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("그림 저장:", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
import os
import tarfile
import urllib.request
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/codingalzi/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "notebooks/datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
    if not os.path.isdir(housing_path):
        os.makedirs(housing_path)
    tgz_path = os.path.join(housing_path, "housing.tgz")
    urllib.request.urlretrieve(housing_url, tgz_path)
    housing_tgz = tarfile.open(tgz_path)
    housing_tgz.extractall(path=housing_path)
    housing_tgz.close()
fetch_housing_data()
import pandas as pd
def load_housing_data(housing_path=HOUSING_PATH):
    csv_path = os.path.join(housing_path, "housing.csv")
    return pd.read_csv(csv_path)
housing = load_housing_data()
housing.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | ocean_proximity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -122.23 | 37.88 | 41.0 | 880.0 | 129.0 | 322.0 | 126.0 | 8.3252 | 452600.0 | NEAR BAY | 
| 1 | -122.22 | 37.86 | 21.0 | 7099.0 | 1106.0 | 2401.0 | 1138.0 | 8.3014 | 358500.0 | NEAR BAY | 
| 2 | -122.24 | 37.85 | 52.0 | 1467.0 | 190.0 | 496.0 | 177.0 | 7.2574 | 352100.0 | NEAR BAY | 
| 3 | -122.25 | 37.85 | 52.0 | 1274.0 | 235.0 | 558.0 | 219.0 | 5.6431 | 341300.0 | NEAR BAY | 
| 4 | -122.25 | 37.85 | 52.0 | 1627.0 | 280.0 | 565.0 | 259.0 | 3.8462 | 342200.0 | NEAR BAY | 
housing.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 20640 entries, 0 to 20639 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 longitude 20640 non-null float64 1 latitude 20640 non-null float64 2 housing_median_age 20640 non-null float64 3 total_rooms 20640 non-null float64 4 total_bedrooms 20433 non-null float64 5 population 20640 non-null float64 6 households 20640 non-null float64 7 median_income 20640 non-null float64 8 median_house_value 20640 non-null float64 9 ocean_proximity 20640 non-null object dtypes: float64(9), object(1) memory usage: 1.6+ MB
housing["ocean_proximity"].value_counts()
<1H OCEAN 9136 INLAND 6551 NEAR OCEAN 2658 NEAR BAY 2290 ISLAND 5 Name: ocean_proximity, dtype: int64
housing.describe()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
|---|---|---|---|---|---|---|---|---|---|
| count | 20640.000000 | 20640.000000 | 20640.000000 | 20640.000000 | 20433.000000 | 20640.000000 | 20640.000000 | 20640.000000 | 20640.000000 | 
| mean | -119.569704 | 35.631861 | 28.639486 | 2635.763081 | 537.870553 | 1425.476744 | 499.539680 | 3.870671 | 206855.816909 | 
| std | 2.003532 | 2.135952 | 12.585558 | 2181.615252 | 421.385070 | 1132.462122 | 382.329753 | 1.899822 | 115395.615874 | 
| min | -124.350000 | 32.540000 | 1.000000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 | 0.499900 | 14999.000000 | 
| 25% | -121.800000 | 33.930000 | 18.000000 | 1447.750000 | 296.000000 | 787.000000 | 280.000000 | 2.563400 | 119600.000000 | 
| 50% | -118.490000 | 34.260000 | 29.000000 | 2127.000000 | 435.000000 | 1166.000000 | 409.000000 | 3.534800 | 179700.000000 | 
| 75% | -118.010000 | 37.710000 | 37.000000 | 3148.000000 | 647.000000 | 1725.000000 | 605.000000 | 4.743250 | 264725.000000 | 
| max | -114.310000 | 41.950000 | 52.000000 | 39320.000000 | 6445.000000 | 35682.000000 | 6082.000000 | 15.000100 | 500001.000000 | 
%matplotlib inline
import matplotlib.pyplot as plt
housing.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()
그림 저장: attribute_histogram_plots
# 노트북의 실행 결과가 동일하도록
np.random.seed(42)
import numpy as np
# 예시 용도로 만든 훈련 세트/테스트 세트 분류 함수. 실전용 아님.
def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]
train_set, test_set = split_train_test(housing, 0.2)
len(train_set)
16512
len(test_set)
4128
len(test_set) / len(housing)
0.2
split_train_test_by_id() 함수zlib.crc32() 함수: 파일의 체크섬을 CRC 방식으로 계산한 32비트 정수 반환0xffffffff: 32비트 정수 중에서 가장 큰 정수, 즉 2**32 - 1.test_ratio * 2**32: 32비트 정수 중에서 test_ratio 비율에 해당하는 정수
  예를 들어, test_ratio = 0.2 이며, 하위 20%에 해당하는 정수.&: 이진 논리곱(binary AND)이라는 비트 연산자.0xffffffff와의 비트 연산을 통해 2**32 보다 작은 값으로 제한하기 위해 사용됨. 
  하지만 zlib.crc32() 함수가 32비트 정수를 반환하기에 굳이 사용할 필요 없음.from zlib import crc32
def test_set_check(identifier, test_ratio):
    return crc32(np.int64(identifier)) & 0xffffffff < test_ratio * 2**32
def split_train_test_by_id(data, test_ratio, id_column):
    ids = data[id_column]
    in_test_set = ids.apply(lambda id_: test_set_check(id_, test_ratio))
    return data.loc[~in_test_set], data.loc[in_test_set]
housing_with_id = housing.reset_index()   # `index` 열이 추가된 데이터프레임 반환
train_set, test_set = split_train_test_by_id(housing_with_id, 0.2, "index")
housing_with_id["id"] = housing["longitude"] * 1000 + housing["latitude"]
train_set, test_set = split_train_test_by_id(housing_with_id, 0.2, "id")
test_set.head()
| index | longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | ocean_proximity | id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 59 | 59 | -122.29 | 37.82 | 2.0 | 158.0 | 43.0 | 94.0 | 57.0 | 2.5625 | 60000.0 | NEAR BAY | -122252.18 | 
| 60 | 60 | -122.29 | 37.83 | 52.0 | 1121.0 | 211.0 | 554.0 | 187.0 | 3.3929 | 75700.0 | NEAR BAY | -122252.17 | 
| 61 | 61 | -122.29 | 37.82 | 49.0 | 135.0 | 29.0 | 86.0 | 23.0 | 6.1183 | 75000.0 | NEAR BAY | -122252.18 | 
| 62 | 62 | -122.29 | 37.81 | 50.0 | 760.0 | 190.0 | 377.0 | 122.0 | 0.9011 | 86100.0 | NEAR BAY | -122252.19 | 
| 67 | 67 | -122.29 | 37.80 | 52.0 | 1027.0 | 244.0 | 492.0 | 147.0 | 2.6094 | 81300.0 | NEAR BAY | -122252.20 | 
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=42)
test_set.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | ocean_proximity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 20046 | -119.01 | 36.06 | 25.0 | 1505.0 | NaN | 1392.0 | 359.0 | 1.6812 | 47700.0 | INLAND | 
| 3024 | -119.46 | 35.14 | 30.0 | 2943.0 | NaN | 1565.0 | 584.0 | 2.5313 | 45800.0 | INLAND | 
| 15663 | -122.44 | 37.80 | 52.0 | 3830.0 | NaN | 1310.0 | 963.0 | 3.4801 | 500001.0 | NEAR BAY | 
| 20484 | -118.72 | 34.28 | 17.0 | 3051.0 | NaN | 1705.0 | 495.0 | 5.7376 | 218600.0 | <1H OCEAN | 
| 9814 | -121.93 | 36.62 | 34.0 | 2351.0 | NaN | 1063.0 | 428.0 | 3.7250 | 278000.0 | NEAR OCEAN | 
housing["median_income"].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f1c25601550>
대부분 구역의 중간 소득이 1.5~6.0(15,000~60,000$) 사이
소득 구간을 아래 숫자를 기준으로 5개로 구분
[0, 1.5, 3.0, 4.6, 6.0, np,inf]
housing["income_cat"] = pd.cut(housing["median_income"],
                               bins=[0., 1.5, 3.0, 4.5, 6., np.inf],
                               labels=[1, 2, 3, 4, 5])
housing["income_cat"].value_counts()
3 7236 2 6581 4 3639 5 2362 1 822 Name: income_cat, dtype: int64
housing["income_cat"].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f1c247c3d90>
housing["income_cat"] 기준from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(housing, housing["income_cat"]):
    strat_train_set = housing.loc[train_index]
    strat_test_set = housing.loc[test_index]
strat_test_set["income_cat"].value_counts() / len(strat_test_set)
3 0.350533 2 0.318798 4 0.176357 5 0.114583 1 0.039729 Name: income_cat, dtype: float64
housing["income_cat"].value_counts() / len(housing)
3 0.350581 2 0.318847 4 0.176308 5 0.114438 1 0.039826 Name: income_cat, dtype: float64
def income_cat_proportions(data):
    return data["income_cat"].value_counts() / len(data)
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=42)
compare_props = pd.DataFrame({
    "Overall": income_cat_proportions(housing),
    "Stratified": income_cat_proportions(strat_test_set),
    "Random": income_cat_proportions(test_set),
}).sort_index()
compare_props["Rand. %error"] = 100 * compare_props["Random"] / compare_props["Overall"] - 100
compare_props["Strat. %error"] = 100 * compare_props["Stratified"] / compare_props["Overall"] - 100
compare_props
| Overall | Stratified | Random | Rand. %error | Strat. %error | |
|---|---|---|---|---|---|
| 1 | 0.039826 | 0.039729 | 0.040213 | 0.973236 | -0.243309 | 
| 2 | 0.318847 | 0.318798 | 0.324370 | 1.732260 | -0.015195 | 
| 3 | 0.350581 | 0.350533 | 0.358527 | 2.266446 | -0.013820 | 
| 4 | 0.176308 | 0.176357 | 0.167393 | -5.056334 | 0.027480 | 
| 5 | 0.114438 | 0.114583 | 0.109496 | -4.318374 | 0.127011 | 
income_cat 특성 삭제for set_ in (strat_train_set, strat_test_set):
    set_.drop("income_cat", axis=1, inplace=True)
훈련세트 원본을 그대로 두고 복사해서 사용한다.
housing = strat_train_set.copy()
housing.plot(kind="scatter", x="longitude", y="latitude")
save_fig("bad_visualization_plot")
그림 저장: bad_visualization_plot
alpha 키워드 인자 활용housing.plot(kind="scatter", x="longitude", y="latitude", alpha=0.1)
save_fig("better_visualization_plot")
그림 저장: better_visualization_plot
sharex=False : x-축의 값과 범례를 표시하지 못하는 버그 수정 용도이며 임시 방편 해결책임
housing.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4,
             s=housing["population"]/100, label="population", figsize=(10,7),
             c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
             sharex=False)
plt.legend()
save_fig("housing_prices_scatterplot")
그림 저장: housing_prices_scatterplot
# 캘리포니아 지도 다운로드
images_path = os.path.join(PROJECT_ROOT_DIR, "images", "end_to_end_project")
os.makedirs(images_path, exist_ok=True)
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
filename = "california.png"
print("Downloading", filename)
url = DOWNLOAD_ROOT + "images/end_to_end_project/" + filename
urllib.request.urlretrieve(url, os.path.join(images_path, filename))
Downloading california.png
('./images/end_to_end_project/california.png',
 <http.client.HTTPMessage at 0x7f1c23de6d90>)
import matplotlib.image as mpimg
california_img=mpimg.imread(os.path.join(images_path, filename))
ax = housing.plot(kind="scatter", x="longitude", y="latitude", figsize=(10,7),
                  s=housing['population']/100, label="Population",
                  c="median_house_value", cmap=plt.get_cmap("jet"),
                  colorbar=False, alpha=0.4)
plt.imshow(california_img, extent=[-124.55, -113.80, 32.45, 42.05], alpha=0.5,
           cmap=plt.get_cmap("jet"))
plt.ylabel("Latitude", fontsize=14)
plt.xlabel("Longitude", fontsize=14)
prices = housing["median_house_value"]
tick_values = np.linspace(prices.min(), prices.max(), 11)
cbar = plt.colorbar(ticks=tick_values/prices.max())
cbar.ax.set_yticklabels(["$%dk"%(round(v/1000)) for v in tick_values], fontsize=14)
cbar.set_label('Median House Value', fontsize=16)
plt.legend(fontsize=16)
save_fig("california_housing_prices_plot")
plt.show()
그림 저장: california_housing_prices_plot
corr_matrix = housing.corr()
corr_matrix["median_house_value"].sort_values(ascending=False)
median_house_value 1.000000 median_income 0.687160 total_rooms 0.135097 housing_median_age 0.114110 households 0.064506 total_bedrooms 0.047689 population -0.026920 longitude -0.047432 latitude -0.142724 Name: median_house_value, dtype: float64
<그림 출처: 위키백과>
# from pandas.tools.plotting import scatter_matrix # 옛날 버전의 판다스에서는
from pandas.plotting import scatter_matrix
attributes = ["median_house_value", "median_income", "total_rooms",
              "housing_median_age"]
scatter_matrix(housing[attributes], figsize=(12, 8))
save_fig("scatter_matrix_plot")
그림 저장: scatter_matrix_plot
housing.plot(kind="scatter", x="median_income", y="median_house_value",
             alpha=0.1)
plt.axis([0, 16, 0, 550000])
save_fig("income_vs_house_value_scatterplot")
그림 저장: income_vs_house_value_scatterplot
구역별 방의 총 개수와 침실의 총 개수 대신 아래 특성이 보다 유용하다.
housing["rooms_per_household"] = housing["total_rooms"]/housing["households"]
housing["bedrooms_per_room"] = housing["total_bedrooms"]/housing["total_rooms"]
housing["population_per_household"]=housing["population"]/housing["households"]
corr_matrix = housing.corr()
corr_matrix["median_house_value"].sort_values(ascending=False)
median_house_value 1.000000 median_income 0.687160 rooms_per_household 0.146285 total_rooms 0.135097 housing_median_age 0.114110 households 0.064506 total_bedrooms 0.047689 population_per_household -0.021985 population -0.026920 longitude -0.047432 latitude -0.142724 bedrooms_per_room -0.259984 Name: median_house_value, dtype: float64
가구당 방 개수의 역할은 여전히 미미하다.
housing.plot(kind="scatter", x="rooms_per_household", y="median_house_value",
             alpha=0.2)
plt.show()
housing.describe()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | rooms_per_household | bedrooms_per_room | population_per_household | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 16512.000000 | 16512.000000 | 16512.000000 | 16512.000000 | 16354.000000 | 16512.000000 | 16512.000000 | 16512.000000 | 16512.000000 | 16512.000000 | 16354.000000 | 16512.000000 | 
| mean | -119.575834 | 35.639577 | 28.653101 | 2622.728319 | 534.973890 | 1419.790819 | 497.060380 | 3.875589 | 206990.920724 | 5.440341 | 0.212878 | 3.096437 | 
| std | 2.001860 | 2.138058 | 12.574726 | 2138.458419 | 412.699041 | 1115.686241 | 375.720845 | 1.904950 | 115703.014830 | 2.611712 | 0.057379 | 11.584826 | 
| min | -124.350000 | 32.540000 | 1.000000 | 6.000000 | 2.000000 | 3.000000 | 2.000000 | 0.499900 | 14999.000000 | 1.130435 | 0.100000 | 0.692308 | 
| 25% | -121.800000 | 33.940000 | 18.000000 | 1443.000000 | 295.000000 | 784.000000 | 279.000000 | 2.566775 | 119800.000000 | 4.442040 | 0.175304 | 2.431287 | 
| 50% | -118.510000 | 34.260000 | 29.000000 | 2119.500000 | 433.000000 | 1164.000000 | 408.000000 | 3.540900 | 179500.000000 | 5.232284 | 0.203031 | 2.817653 | 
| 75% | -118.010000 | 37.720000 | 37.000000 | 3141.000000 | 644.000000 | 1719.250000 | 602.000000 | 4.744475 | 263900.000000 | 6.056361 | 0.239831 | 3.281420 | 
| max | -114.310000 | 41.950000 | 52.000000 | 39320.000000 | 6210.000000 | 35682.000000 | 5358.000000 | 15.000100 | 500001.000000 | 141.909091 | 1.000000 | 1243.333333 | 
훈련세트를 대상으로 중간 주택 가격을 타깃값(레이블)로 사용하기 위해 다른 특성으로 분리한다.
housing: 훈련에 사용되는 특성 9개 (중간 주택 가격 제외)housing_labels: 중간 주택 가격housing = strat_train_set.drop("median_house_value", axis=1)
housing_labels = strat_train_set["median_house_value"].copy()
total_bedrooms 특성에 존재하는 누락치 처리법 결정¶sample_incomplete_rows = housing[housing.isnull().any(axis=1)].head()
sample_incomplete_rows
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | ocean_proximity | |
|---|---|---|---|---|---|---|---|---|---|
| 4629 | -118.30 | 34.07 | 18.0 | 3759.0 | NaN | 3296.0 | 1462.0 | 2.2708 | <1H OCEAN | 
| 6068 | -117.86 | 34.01 | 16.0 | 4632.0 | NaN | 3038.0 | 727.0 | 5.1762 | <1H OCEAN | 
| 17923 | -121.97 | 37.35 | 30.0 | 1955.0 | NaN | 999.0 | 386.0 | 4.6328 | <1H OCEAN | 
| 13656 | -117.30 | 34.05 | 6.0 | 2155.0 | NaN | 1039.0 | 391.0 | 1.6675 | INLAND | 
| 19252 | -122.79 | 38.48 | 7.0 | 6837.0 | NaN | 3468.0 | 1405.0 | 3.1662 | <1H OCEAN | 
sample_incomplete_rows.dropna(subset=["total_bedrooms"])    # 옵션 1
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | ocean_proximity | 
|---|
sample_incomplete_rows.drop("total_bedrooms", axis=1)       # 옵션 2
| longitude | latitude | housing_median_age | total_rooms | population | households | median_income | ocean_proximity | |
|---|---|---|---|---|---|---|---|---|
| 4629 | -118.30 | 34.07 | 18.0 | 3759.0 | 3296.0 | 1462.0 | 2.2708 | <1H OCEAN | 
| 6068 | -117.86 | 34.01 | 16.0 | 4632.0 | 3038.0 | 727.0 | 5.1762 | <1H OCEAN | 
| 17923 | -121.97 | 37.35 | 30.0 | 1955.0 | 999.0 | 386.0 | 4.6328 | <1H OCEAN | 
| 13656 | -117.30 | 34.05 | 6.0 | 2155.0 | 1039.0 | 391.0 | 1.6675 | INLAND | 
| 19252 | -122.79 | 38.48 | 7.0 | 6837.0 | 3468.0 | 1405.0 | 3.1662 | <1H OCEAN | 
median = housing["total_bedrooms"].median()
sample_incomplete_rows["total_bedrooms"].fillna(median, inplace=True) # 옵션 3
여기서는 옵션 3을 활용한다. 즉, 누락치를 특성 중앙값으로 채운다.
sample_incomplete_rows
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | ocean_proximity | |
|---|---|---|---|---|---|---|---|---|---|
| 4629 | -118.30 | 34.07 | 18.0 | 3759.0 | 433.0 | 3296.0 | 1462.0 | 2.2708 | <1H OCEAN | 
| 6068 | -117.86 | 34.01 | 16.0 | 4632.0 | 433.0 | 3038.0 | 727.0 | 5.1762 | <1H OCEAN | 
| 17923 | -121.97 | 37.35 | 30.0 | 1955.0 | 433.0 | 999.0 | 386.0 | 4.6328 | <1H OCEAN | 
| 13656 | -117.30 | 34.05 | 6.0 | 2155.0 | 433.0 | 1039.0 | 391.0 | 1.6675 | INLAND | 
| 19252 | -122.79 | 38.48 | 7.0 | 6837.0 | 433.0 | 3468.0 | 1405.0 | 3.1662 | <1H OCEAN | 
SimpleImputer 변환기¶SimpleImputer 변환기를 활용하면 옵션 3을 쉽게 처리할 수 있다.
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="median")
중앙값이 수치형 특성에서만 계산될 수 있기 때문에 범주형 특성을 제거한 후에 SimpleImputer 변환기를 적용해야 한다.
housing_num = housing.drop("ocean_proximity", axis=1)
# 다른 방법: housing_num = housing.select_dtypes(include=[np.number])
SimpleImputer 변환기의 fit() 메서드는 지정된 통계 특성값을 계산하여 저장해 둔다.
statistics_ 속성에 저장됨.imputer.fit(housing_num)
SimpleImputer(add_indicator=False, copy=True, fill_value=None,
              missing_values=nan, strategy='median', verbose=0)
imputer.statistics_
array([-118.51  ,   34.26  ,   29.    , 2119.5   ,  433.    , 1164.    ,
        408.    ,    3.5409])
각 특성의 중앙값이 수동으로 계산한 것과 동일하다.
housing_num.median().values
array([-118.51  ,   34.26  ,   29.    , 2119.5   ,  433.    , 1164.    ,
        408.    ,    3.5409])
transform() 메서드를 실행하여 수치형 특성을 변환한다.
housing_tr: 수치형 특성의 누락치가 채워진 (훈련) 데이터셋X = imputer.transform(housing_num)
housing_tr = pd.DataFrame(X, columns=housing_num.columns,
                          index=housing_num.index)
앞서 누락치를 수동으로 채운 결과와 동일함을 확인할 수 있다.
housing_tr.loc[sample_incomplete_rows.index.values]
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | |
|---|---|---|---|---|---|---|---|---|
| 4629 | -118.30 | 34.07 | 18.0 | 3759.0 | 433.0 | 3296.0 | 1462.0 | 2.2708 | 
| 6068 | -117.86 | 34.01 | 16.0 | 4632.0 | 433.0 | 3038.0 | 727.0 | 5.1762 | 
| 17923 | -121.97 | 37.35 | 30.0 | 1955.0 | 433.0 | 999.0 | 386.0 | 4.6328 | 
| 13656 | -117.30 | 34.05 | 6.0 | 2155.0 | 433.0 | 1039.0 | 391.0 | 1.6675 | 
| 19252 | -122.79 | 38.48 | 7.0 | 6837.0 | 433.0 | 3468.0 | 1405.0 | 3.1662 | 
누락치를 채우는데 사용한 전략이 중앙값(median)임을 다시 확인할 수 있다.
imputer.strategy
'median'
housing_tr.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | |
|---|---|---|---|---|---|---|---|---|
| 17606 | -121.89 | 37.29 | 38.0 | 1568.0 | 351.0 | 710.0 | 339.0 | 2.7042 | 
| 18632 | -121.93 | 37.05 | 14.0 | 679.0 | 108.0 | 306.0 | 113.0 | 6.4214 | 
| 14650 | -117.20 | 32.77 | 31.0 | 1952.0 | 471.0 | 936.0 | 462.0 | 2.8621 | 
| 3230 | -119.61 | 36.31 | 25.0 | 1847.0 | 371.0 | 1460.0 | 353.0 | 1.8839 | 
| 3555 | -118.59 | 34.23 | 17.0 | 6592.0 | 1525.0 | 4459.0 | 1463.0 | 3.0347 | 
ocean_proximity 전처리 하기: 원-핫 인코딩 활용¶해안 근접도(ocean_proximity) 다시 확인
housing_cat = housing[["ocean_proximity"]]
housing_cat.head(10)
| ocean_proximity | |
|---|---|
| 17606 | <1H OCEAN | 
| 18632 | <1H OCEAN | 
| 14650 | NEAR OCEAN | 
| 3230 | INLAND | 
| 3555 | <1H OCEAN | 
| 19480 | INLAND | 
| 8879 | <1H OCEAN | 
| 13685 | INLAND | 
| 4937 | <1H OCEAN | 
| 4861 | <1H OCEAN | 
범주형 특성값을 단순하게 수치화하면 다음과 같다.
from sklearn.preprocessing import OrdinalEncoder
ordinal_encoder = OrdinalEncoder()
housing_cat_encoded = ordinal_encoder.fit_transform(housing_cat)
housing_cat_encoded[:10]
array([[0.],
       [0.],
       [4.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.]])
수치화된 번호는 범주들의 인덱스에 해당한다. 하지만 숫자의 크기가 모델 훈련 과정에 잘못된 영향을 줄 수 있음에 주의해야 한다.
ordinal_encoder.categories_
[array(['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN'],
       dtype=object)]
따라서 여기서는 대신에 원-핫-인코딩을 활용한다.
from sklearn.preprocessing import OneHotEncoder
cat_encoder = OneHotEncoder()
housing_cat_1hot = cat_encoder.fit_transform(housing_cat)
housing_cat_1hot
<16512x5 sparse matrix of type '<class 'numpy.float64'>' with 16512 stored elements in Compressed Sparse Row format>
OneHotEncoder 변환기¶toarray() 메서드: 희소 행렬을 밀집 배열로 변환housing_cat_1hot.toarray()
array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       ...,
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.]])
OneHotEncoder 변환기의 sparse=False 하이퍼파라미터로 지정하면 밀집 행렬이 생성된다.
cat_encoder = OneHotEncoder(sparse=False)
housing_cat_1hot = cat_encoder.fit_transform(housing_cat)
housing_cat_1hot
array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       ...,
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.]])
사용된 범주 어레이는 categories_ 속성에 저장된다.
cat_encoder.categories_
[array(['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN'],
       dtype=object)]
직접 변환기를 구현해야 한다.
__init__() 메서드: 방당 침실수 특성을 추가여부 지정 키워드 인자 활용. (연습을 위해 일부러 키워드 인자로 지정하였음.)fit()과 transform() 메서드를 갖는 클래스 선언fit() 메서드: 아무 것도 하지 않고 self 리턴. 즉, 아무런 값도 추정할 필요 없음.transform() 메서드: 속성 추가 기능 구현해야 함np.c_() 함수: 두 어레이를 열을 축으로 해서 이어 붙이기BaseEstimator 클래스와 TransformerMixin 클래스 상속BaseEstimator 클래스 상속: 하이퍼파라미터 튜닝에 필요한 get_params(), set_params() 메서드 자동 구현.
단, 초기 설정 메서드(__init__())가 *args 또는 kargs 형식의 인자를 사용하지 않아야 함.TransformerMixin 클래스 상속: fit_transform() 메서드 자동 구현from sklearn.base import BaseEstimator, TransformerMixin
# 열 인덱스
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6
class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room=True): # *args 또는 **kargs 없으며, 키워드 인자 활용함.
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self  # 아무것도 하지 않음
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room:
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household,
                         bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]
attr_adder = CombinedAttributesAdder(add_bedrooms_per_room=False) # 연습 용도로 방당 침실수를 추가하지 않음
housing_extra_attribs = attr_adder.transform(housing.to_numpy())
특성 추가에 사용되는 기본의 특성들의 인덱스를 (3, 4, 5, 6) 처럼 수동으로 지정하는 것 대신에 아래와 같이 동적으로 처리하는 게 보다 좋다.
get_loc() 메서드 활용col_names = "total_rooms", "total_bedrooms", "population", "households"
rooms_ix, bedrooms_ix, population_ix, households_ix = [
    housing.columns.get_loc(c) for c in col_names] # 열 인덱스 구하기
housing_extra_attribs는 넘파이 어레이이며,
따라서 기존에 사용된 columns와 index를 이용하여 DataFrame으로 복원해야 한다.
housing_extra_attribs = pd.DataFrame(
    housing_extra_attribs,
    columns=list(housing.columns)+["rooms_per_household", "population_per_household"],
    index=housing.index)
housing_extra_attribs.head()
| longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | ocean_proximity | rooms_per_household | population_per_household | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 17606 | -121.89 | 37.29 | 38 | 1568 | 351 | 710 | 339 | 2.7042 | <1H OCEAN | 4.62537 | 2.0944 | 
| 18632 | -121.93 | 37.05 | 14 | 679 | 108 | 306 | 113 | 6.4214 | <1H OCEAN | 6.00885 | 2.70796 | 
| 14650 | -117.2 | 32.77 | 31 | 1952 | 471 | 936 | 462 | 2.8621 | NEAR OCEAN | 4.22511 | 2.02597 | 
| 3230 | -119.61 | 36.31 | 25 | 1847 | 371 | 1460 | 353 | 1.8839 | INLAND | 5.23229 | 4.13598 | 
| 3555 | -118.59 | 34.23 | 17 | 6592 | 1525 | 4459 | 1463 | 3.0347 | <1H OCEAN | 4.50581 | 3.04785 | 
여기서는 사이킷런의 StandardScaler 변환기를 이용하여 표준화 축척 조정(스케일링, scaling)을 사용한다.
Pipiline 객체 생성from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="median")),
        ('attribs_adder', CombinedAttributesAdder()),    # 방당 침실수 특성도 추가. 즉, 총 3개 특성 추가됨.
        ('std_scaler', StandardScaler()),
    ])
housing_num_tr = num_pipeline.fit_transform(housing_num)
housing_num_tr
array([[-1.15604281,  0.77194962,  0.74333089, ..., -0.31205452,
        -0.08649871,  0.15531753],
       [-1.17602483,  0.6596948 , -1.1653172 , ...,  0.21768338,
        -0.03353391, -0.83628902],
       [ 1.18684903, -1.34218285,  0.18664186, ..., -0.46531516,
        -0.09240499,  0.4222004 ],
       ...,
       [ 1.58648943, -0.72478134, -1.56295222, ...,  0.3469342 ,
        -0.03055414, -0.52177644],
       [ 0.78221312, -0.85106801,  0.18664186, ...,  0.02499488,
         0.06150916, -0.30340741],
       [-1.43579109,  0.99645926,  1.85670895, ..., -0.22852947,
        -0.09586294,  0.10180567]])
ColumnTransformer 변환기: 특성별 파이프라인 변환기를 지정 가능from sklearn.compose import ColumnTransformer
num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]
full_pipeline = ColumnTransformer([
        ("num", num_pipeline, num_attribs),
        ("cat", OneHotEncoder(), cat_attribs),
    ])
housing_prepared = full_pipeline.fit_transform(housing)
모든 전처리 결과는 다음과 같다.
housing_prepared
array([[-1.15604281,  0.77194962,  0.74333089, ...,  0.        ,
         0.        ,  0.        ],
       [-1.17602483,  0.6596948 , -1.1653172 , ...,  0.        ,
         0.        ,  0.        ],
       [ 1.18684903, -1.34218285,  0.18664186, ...,  0.        ,
         0.        ,  1.        ],
       ...,
       [ 1.58648943, -0.72478134, -1.56295222, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.78221312, -0.85106801,  0.18664186, ...,  0.        ,
         0.        ,  0.        ],
       [-1.43579109,  0.99645926,  1.85670895, ...,  0.        ,
         1.        ,  0.        ]])
housing_prepared.shape
(16512, 16)
사이킷런이 제공하는 다양한 회귀 모델을 선택하여 훈련과 예측을 실행하는 방법 소개한다. 소개되는 모델은 다음과 같다.
LinearRegression 클래스 활용DecisionTreeRegressor 또한 훈련되는 모델의 성능을 평가하는 교차 검증 방식도 소개한다. 모델 평가 기준은 RMSE(평균 제곱근 오차)를 기본으로 사용한다.
LinearRegression) 훈련¶fit() 메서드를 전처리된 훈련세트와 레이블을 인자로 사용하여 호출LinearRegression 모델은 모델을 학습하는 대신에 무어-펜로즈 역행렬을 이용하여 직접 파라미터를 계산함. (4장 참조)from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(housing_prepared, housing_labels)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
# 연습 용도로 훈련 샘플 몇 개를 대상으로 예측 실행
some_data = housing.iloc[:5]
some_labels = housing_labels.iloc[:5]
some_data_prepared = full_pipeline.transform(some_data)
print("예측:", lin_reg.predict(some_data_prepared))
예측: [210644.60459286 317768.80697211 210956.43331178 59218.98886849 189747.55849879]
실제 중간 주택 가격은 다음과 같다.
print("레이블:", list(some_labels))
레이블: [286600.0, 340600.0, 196900.0, 46300.0, 254500.0]
mean_squared_error() 함수는 squared=False로 키워드 인자를 사용하면 바로
RMSE 반환함. 아래 코드에서는 기본값인 squared=True를 사용하기에 직접 제곱근 값을 계산해야 함.from sklearn.metrics import mean_squared_error
housing_predictions = lin_reg.predict(housing_prepared)
lin_mse = mean_squared_error(housing_labels, housing_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse
68628.19819848923
from sklearn.metrics import mean_absolute_error
lin_mae = mean_absolute_error(housing_labels, housing_predictions)
lin_mae
49439.89599001897
RMSE가 MAE 보다 크다. 따라서 훈련 세트에 이상치가 많이 포함되어 있다고 유추할 수 있다. 앞서 살펴 보았듯이, 여러 이상치를 제거하고 훈련시키면 좀 더 좋은 성능이 나올 수 있다. (아래 프로젝트 안내 참조)
훈련 및 예측 과정은 동일하다.
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(housing_prepared, housing_labels)
DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=42, splitter='best')
RMSE가 0으로 측정된다.
housing_predictions = tree_reg.predict(housing_prepared)
tree_mse = mean_squared_error(housing_labels, housing_predictions)
tree_rmse = np.sqrt(tree_mse)
tree_rmse
0.0
cross_val_score 함수를 활용하여 특정 모델의 성능을 교차 검증(cross validation) 기법으로 평가한다. 
사용되는 하이퍼파라미터는 다음과 같다.
cv: 폴드 수. 아래 코드는 10개의 폴드 사용.(cv=10)scoring: 교차 검증에 사용되는 성능 평가 기준을 지정함. 
  단, 여기서는 높을 수록 좋은 효용함수 사용. 
  따라서 아래 코드에서는 작을 수록 좋은 비용함수의 음숫값을 사용하도록 지정함.
  (scoring="neg_mean_squared_error")from sklearn.model_selection import cross_val_score
scores = cross_val_score(tree_reg, housing_prepared, housing_labels,
                         scoring="neg_mean_squared_error", cv=10)
tree_rmse_scores = np.sqrt(-scores)
결정 트리에 대한 교차 검증 결과가 좋지 않다. 즉, 앞선 결과가 완벽한 과대적합이었음이 다시 한 번 확인된다.
def display_scores(scores):
    print("점수:", scores)
    print("평균:", scores.mean())
    print("표준 편차:", scores.std())
display_scores(tree_rmse_scores)
점수: [70194.33680785 66855.16363941 72432.58244769 70758.73896782 71115.88230639 75585.14172901 70262.86139133 70273.6325285 75366.87952553 71231.65726027] 평균: 71407.68766037929 표준 편차: 2439.4345041191004
선형 회귀에 대한 교차 검증 결과가 오히려 좀 더 좋다.
lin_scores = cross_val_score(lin_reg, housing_prepared, housing_labels,
                             scoring="neg_mean_squared_error", cv=10)
lin_rmse_scores = np.sqrt(-lin_scores)
display_scores(lin_rmse_scores)
점수: [66782.73843989 66960.118071 70347.95244419 74739.57052552 68031.13388938 71193.84183426 64969.63056405 68281.61137997 71552.91566558 67665.10082067] 평균: 69052.46136345083 표준 편차: 2731.674001798344
팬다스의 시리즈(Series) 자료형이 제공하는 describe() 메서드를 이용하여 10개 검증 결과의 통계 정보를 확인할 수 있다.
scores = cross_val_score(lin_reg, housing_prepared, housing_labels, scoring="neg_mean_squared_error", cv=10)
pd.Series(np.sqrt(-scores)).describe()
count 10.000000 mean 69052.461363 std 2879.437224 min 64969.630564 25% 67136.363758 50% 68156.372635 75% 70982.369487 max 74739.570526 dtype: float64
RandomForestRegressor 훈련¶무작위로 선택한 특성을 이용하는 결정 트리 여러 개를 훈련 시킨 후 훈련된 모델들의 평균 예측값을 예측값으로 사용하는 모델이며, 아래 코드에서 사용된 하이퍼파라미터는 다음과 같다.
n_estimators=100 기본값 사용: 사용되는 결정 트리 개수 지정from sklearn.ensemble import RandomForestRegressor
forest_reg = RandomForestRegressor(n_estimators=100, random_state=42)
forest_reg.fit(housing_prepared, housing_labels)
RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=0, warm_start=False)
housing_predictions = forest_reg.predict(housing_prepared)
forest_mse = mean_squared_error(housing_labels, housing_predictions)
forest_rmse = np.sqrt(forest_mse)
forest_rmse
18603.515021376355
랜덤 포레스트를 대상으로 교차 검증 진행하면, 선형회귀, 결정 트리 모델보다 성능이 좋게 나온다. 하지만 교차 검증 결과가 바로 위 훈련 결과보자 좋지 않다. 즉, 위 훈련 모델이 과대적합이었다는 것을 반증한다.
from sklearn.model_selection import cross_val_score
forest_scores = cross_val_score(forest_reg, housing_prepared, housing_labels,
                                scoring="neg_mean_squared_error", cv=10)
forest_rmse_scores = np.sqrt(-forest_scores)
display_scores(forest_rmse_scores)
점수: [49519.80364233 47461.9115823 50029.02762854 52325.28068953 49308.39426421 53446.37892622 48634.8036574 47585.73832311 53490.10699751 50021.5852922 ] 평균: 50182.303100336096 표준 편차: 2097.0810550985693
가능성 있는 모델을 몇 개 선정한 다음에는 모델에 사용된 하이퍼파라미터의 최적값을 찾아야 한다. 좋은 파라미터를 탐색하는 세 가지 기법은 다음과 같다.
여기서는 그리드 탐색과 랜덤 탐색을 간단하게 소개하며, 앙상블 학습은 7장에서 자세히 다룬다.
GridSearchCV 객체를 활용하며, 특정 하이퍼파라미터에 대한 후보로 지정된 값들의 모든 조합에 대해 교차 검증 평가를 진행한다.
아래 코드는 랜덤 포레스트 회귀 모델의 하이퍼파라미터를 탐색한다.
from sklearn.model_selection import GridSearchCV
param_grid = [
    # 첫째 경우: 총 12(=3×4)개의 하이퍼파라미터 조합 시도
    {'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},
    # 둘째 경우: bootstrap은 False로 하고 총 6(=2×3)개의 조합 시도
    {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},
  ]
forest_reg = RandomForestRegressor(random_state=42)
# 5-겹 교차 검증 시도. 따라서 총 (12+6)*5=90번의 훈련 진행
grid_search = GridSearchCV(forest_reg, param_grid, cv=5,
                           scoring='neg_mean_squared_error',
                           return_train_score=True)
grid_search.fit(housing_prepared, housing_labels)
GridSearchCV(cv=5, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=42,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'max_features': [2, 4, 6, 8],
                          'n_estimators': [3, 10, 30]},
                         {'bootstrap': [False], 'max_features': [2, 3, 4],
                          'n_estimators': [3, 10]}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='neg_mean_squared_error', verbose=0)
최상의 하이퍼파라미터 조합은 best_params_ 속성에 저장된다.
max_features와 n_estimators 두 값 모두 지정된 최댓값이 최적으로 선택됨. 
  따라서 숫자를 좀 더 키워도 되는 것으로 보임.grid_search.best_params_
{'max_features': 8, 'n_estimators': 30}
최상의 랜덤 포레스트 회귀 모델은 best_estimator_ 속성에 저장된다.
grid_search.best_estimator_
RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features=8, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=30, n_jobs=None, oob_score=False,
                      random_state=42, verbose=0, warm_start=False)
그리드 탐색에서 테스트한 18개의 하이퍼파라미터 조합에 대한 평가 점수는 다음과 같다.
cv_results_ 속성에 저장됨cvres = grid_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)
63669.11631261028 {'max_features': 2, 'n_estimators': 3}
55627.099719926795 {'max_features': 2, 'n_estimators': 10}
53384.57275149205 {'max_features': 2, 'n_estimators': 30}
60965.950449450494 {'max_features': 4, 'n_estimators': 3}
52741.04704299915 {'max_features': 4, 'n_estimators': 10}
50377.40461678399 {'max_features': 4, 'n_estimators': 30}
58663.93866579625 {'max_features': 6, 'n_estimators': 3}
52006.19873526564 {'max_features': 6, 'n_estimators': 10}
50146.51167415009 {'max_features': 6, 'n_estimators': 30}
57869.25276169646 {'max_features': 8, 'n_estimators': 3}
51711.127883959234 {'max_features': 8, 'n_estimators': 10}
49682.273345071546 {'max_features': 8, 'n_estimators': 30}
62895.06951262424 {'bootstrap': False, 'max_features': 2, 'n_estimators': 3}
54658.176157539405 {'bootstrap': False, 'max_features': 2, 'n_estimators': 10}
59470.40652318466 {'bootstrap': False, 'max_features': 3, 'n_estimators': 3}
52724.9822587892 {'bootstrap': False, 'max_features': 3, 'n_estimators': 10}
57490.5691951261 {'bootstrap': False, 'max_features': 4, 'n_estimators': 3}
51009.495668875716 {'bootstrap': False, 'max_features': 4, 'n_estimators': 10}
cv_results_ 속성에 그리드 탐색 과정에서 알아낸 많은 값들이 저장되어 있다. 
(넘파이 사전 객체로 저장됨.) 
팬다스 데이터프레임으로 형변환하면 사전의 키(key)가 열(column)의 항목으로 사용된다.
pd.DataFrame(grid_search.cv_results_)
| mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_max_features | param_n_estimators | param_bootstrap | params | split0_test_score | split1_test_score | split2_test_score | split3_test_score | split4_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | split3_train_score | split4_train_score | mean_train_score | std_train_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.076153 | 0.002736 | 0.004459 | 0.000129 | 2 | 3 | NaN | {'max_features': 2, 'n_estimators': 3} | -3.837622e+09 | -4.147108e+09 | -4.196408e+09 | -3.903319e+09 | -4.184325e+09 | -4.053756e+09 | 1.519591e+08 | 18 | -1.064113e+09 | -1.105142e+09 | -1.116550e+09 | -1.112342e+09 | -1.129650e+09 | -1.105559e+09 | 2.220402e+07 | 
| 1 | 0.246062 | 0.002566 | 0.012117 | 0.000167 | 2 | 10 | NaN | {'max_features': 2, 'n_estimators': 10} | -3.047771e+09 | -3.254861e+09 | -3.130196e+09 | -2.865188e+09 | -3.173856e+09 | -3.094374e+09 | 1.327062e+08 | 11 | -5.927175e+08 | -5.870952e+08 | -5.776964e+08 | -5.716332e+08 | -5.802501e+08 | -5.818785e+08 | 7.345821e+06 | 
| 2 | 0.737030 | 0.003205 | 0.035192 | 0.003167 | 2 | 30 | NaN | {'max_features': 2, 'n_estimators': 30} | -2.689185e+09 | -3.021086e+09 | -2.948330e+09 | -2.619995e+09 | -2.970968e+09 | -2.849913e+09 | 1.626875e+08 | 9 | -4.381089e+08 | -4.391272e+08 | -4.371702e+08 | -4.376955e+08 | -4.452654e+08 | -4.394734e+08 | 2.966320e+06 | 
| 3 | 0.122908 | 0.001886 | 0.004670 | 0.000216 | 4 | 3 | NaN | {'max_features': 4, 'n_estimators': 3} | -3.730181e+09 | -3.786886e+09 | -3.734515e+09 | -3.418747e+09 | -3.913907e+09 | -3.716847e+09 | 1.631510e+08 | 16 | -9.865163e+08 | -1.012565e+09 | -9.169425e+08 | -1.037400e+09 | -9.707739e+08 | -9.848396e+08 | 4.084607e+07 | 
| 4 | 0.401961 | 0.001792 | 0.012462 | 0.000907 | 4 | 10 | NaN | {'max_features': 4, 'n_estimators': 10} | -2.666283e+09 | -2.784511e+09 | -2.892276e+09 | -2.616813e+09 | -2.948207e+09 | -2.781618e+09 | 1.268607e+08 | 8 | -5.097115e+08 | -5.162820e+08 | -4.962893e+08 | -5.436192e+08 | -5.160297e+08 | -5.163863e+08 | 1.542862e+07 | 
| 5 | 1.211893 | 0.015249 | 0.033719 | 0.000623 | 4 | 30 | NaN | {'max_features': 4, 'n_estimators': 30} | -2.387153e+09 | -2.588448e+09 | -2.666426e+09 | -2.398071e+09 | -2.649316e+09 | -2.537883e+09 | 1.214614e+08 | 3 | -3.838835e+08 | -3.880268e+08 | -3.790867e+08 | -4.040957e+08 | -3.845520e+08 | -3.879289e+08 | 8.571233e+06 | 
| 6 | 0.165049 | 0.005045 | 0.004534 | 0.000099 | 6 | 3 | NaN | {'max_features': 6, 'n_estimators': 3} | -3.119657e+09 | -3.586319e+09 | -3.592772e+09 | -3.328934e+09 | -3.579607e+09 | -3.441458e+09 | 1.893056e+08 | 14 | -9.245343e+08 | -8.886939e+08 | -9.353135e+08 | -9.009801e+08 | -8.624664e+08 | -9.023976e+08 | 2.591445e+07 | 
| 7 | 0.552298 | 0.004174 | 0.012189 | 0.000104 | 6 | 10 | NaN | {'max_features': 6, 'n_estimators': 10} | -2.549663e+09 | -2.782039e+09 | -2.762720e+09 | -2.521134e+09 | -2.907667e+09 | -2.704645e+09 | 1.471569e+08 | 6 | -4.980344e+08 | -5.045869e+08 | -4.994664e+08 | -4.990325e+08 | -5.055542e+08 | -5.013349e+08 | 3.100456e+06 | 
| 8 | 1.674913 | 0.009280 | 0.033569 | 0.000316 | 6 | 30 | NaN | {'max_features': 6, 'n_estimators': 30} | -2.370010e+09 | -2.583638e+09 | -2.607703e+09 | -2.350953e+09 | -2.661059e+09 | -2.514673e+09 | 1.285080e+08 | 2 | -3.838538e+08 | -3.804711e+08 | -3.805218e+08 | -3.856095e+08 | -3.901917e+08 | -3.841296e+08 | 3.617057e+06 | 
| 9 | 0.210843 | 0.002306 | 0.004537 | 0.000037 | 8 | 3 | NaN | {'max_features': 8, 'n_estimators': 3} | -3.353504e+09 | -3.348552e+09 | -3.402843e+09 | -3.129307e+09 | -3.510047e+09 | -3.348850e+09 | 1.241939e+08 | 13 | -9.228123e+08 | -8.553031e+08 | -8.603321e+08 | -8.881964e+08 | -9.151287e+08 | -8.883545e+08 | 2.750227e+07 | 
| 10 | 0.710508 | 0.003138 | 0.012237 | 0.000172 | 8 | 10 | NaN | {'max_features': 8, 'n_estimators': 10} | -2.571970e+09 | -2.718994e+09 | -2.842317e+09 | -2.460258e+09 | -2.776666e+09 | -2.674041e+09 | 1.392777e+08 | 5 | -4.932416e+08 | -4.815238e+08 | -4.730979e+08 | -5.155367e+08 | -4.985555e+08 | -4.923911e+08 | 1.459294e+07 | 
| 11 | 2.147798 | 0.007318 | 0.033414 | 0.000381 | 8 | 30 | NaN | {'max_features': 8, 'n_estimators': 30} | -2.357390e+09 | -2.546640e+09 | -2.591972e+09 | -2.318617e+09 | -2.527022e+09 | -2.468328e+09 | 1.091662e+08 | 1 | -3.841658e+08 | -3.744500e+08 | -3.773239e+08 | -3.882250e+08 | -3.810005e+08 | -3.810330e+08 | 4.871017e+06 | 
| 12 | 0.117525 | 0.001655 | 0.005266 | 0.000084 | 2 | 3 | False | {'bootstrap': False, 'max_features': 2, 'n_est... | -3.785816e+09 | -4.166012e+09 | -4.061751e+09 | -3.675704e+09 | -4.089667e+09 | -3.955790e+09 | 1.900964e+08 | 17 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 
| 13 | 0.389384 | 0.004010 | 0.014504 | 0.000106 | 2 | 10 | False | {'bootstrap': False, 'max_features': 2, 'n_est... | -2.810721e+09 | -3.107789e+09 | -3.131187e+09 | -2.788537e+09 | -3.099347e+09 | -2.987516e+09 | 1.539234e+08 | 10 | -6.056477e-02 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -2.967449e+00 | -6.056027e-01 | 1.181156e+00 | 
| 14 | 0.156460 | 0.003447 | 0.005427 | 0.000130 | 3 | 3 | False | {'bootstrap': False, 'max_features': 3, 'n_est... | -3.618324e+09 | -3.441527e+09 | -3.554815e+09 | -3.619116e+09 | -3.449864e+09 | -3.536729e+09 | 7.795057e+07 | 15 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -6.072840e+01 | -1.214568e+01 | 2.429136e+01 | 
| 15 | 0.516370 | 0.009165 | 0.014551 | 0.000155 | 3 | 10 | False | {'bootstrap': False, 'max_features': 3, 'n_est... | -2.757999e+09 | -2.851737e+09 | -2.830927e+09 | -2.672765e+09 | -2.786190e+09 | -2.779924e+09 | 6.286720e+07 | 7 | -2.089484e+01 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -5.465556e+00 | -5.272080e+00 | 8.093117e+00 | 
| 16 | 0.193802 | 0.003177 | 0.005358 | 0.000139 | 4 | 3 | False | {'bootstrap': False, 'max_features': 4, 'n_est... | -3.134040e+09 | -3.559375e+09 | -3.440422e+09 | -3.053647e+09 | -3.338344e+09 | -3.305166e+09 | 1.879165e+08 | 12 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 
| 17 | 0.643161 | 0.006134 | 0.014409 | 0.000078 | 4 | 10 | False | {'bootstrap': False, 'max_features': 4, 'n_est... | -2.525578e+09 | -2.710011e+09 | -2.609100e+09 | -2.439607e+09 | -2.725548e+09 | -2.601969e+09 | 1.088048e+08 | 4 | -0.000000e+00 | -1.514119e-02 | -0.000000e+00 | -0.000000e+00 | -0.000000e+00 | -3.028238e-03 | 6.056477e-03 | 
데이터 전처리와 그리드 탐색을 연결한 파이프라인을 이용하면 전처리 단계에서 설정해야 하는 값들을 일종의 하이퍼파라미터로 다룰 수 있다. 예를 들어,
CombinedAttributesAdder 클래스의 객체를 생성할 때 지정하는 add_bedrooms_per_room 옵션 변수 값 지정하기등에 대해 어떻게 설정하는 것이 좋은지를 하이퍼파라미터로 지정해 놓으면 그리드 탐색 등을 이용하여 어떤 옵션이 좋은지 함께 탐색해준다.
조금은 고급 기술이지만, 지금까지 배운 내용을 이해한다면 어렵지 않게 적용할 수 있는 기술이다. 파이프라인과 그리드 탐색을 연동한 예제들을 아래 사이트에서 살펴볼 수 있다.
조합 대상인 하이퍼파라이미터 값을 지정된 구간에서 임의의 선택하는 방식을 활용한다. 탐색 공간이 커질 때 사용하며, 기타 작동 방식은 그리드 탐색과 동일하다. 아래 코드는 다음 두 하이퍼파라미터의 탐색 구간을 지정한다.
n_iter=10: 10개의 조합을 임의로 선정cv=5: 각각의 조합에 대해 5겹 교차 검증 실행. 즉, 총 50번 훈련 이루어짐.from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
param_distribs = {
        'n_estimators': randint(low=1, high=200),
        'max_features': randint(low=1, high=8),
    }
forest_reg = RandomForestRegressor(random_state=42)
rnd_search = RandomizedSearchCV(forest_reg, param_distributions=param_distribs,
                                n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)
rnd_search.fit(housing_prepared, housing_labels)
RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=RandomForestRegressor(bootstrap=True,
                                                   ccp_alpha=0.0,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features='auto',
                                                   max_leaf_nodes=None,
                                                   max_samples=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   n_estimators=100,
                                                   n_jobs=None, oob_score=Fals...
                                                   warm_start=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f1c1df88bd0>,
                                        'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f1c1df88c10>},
                   pre_dispatch='2*n_jobs', random_state=42, refit=True,
                   return_train_score=False, scoring='neg_mean_squared_error',
                   verbose=0)
cv_results_ 속성에 저장된 10개 모델의 성능과 사용된 하이퍼파라미터는 다음과 같음.cvres = rnd_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)
49150.70756927707 {'max_features': 7, 'n_estimators': 180}
51389.889203389284 {'max_features': 5, 'n_estimators': 15}
50796.155224308866 {'max_features': 3, 'n_estimators': 72}
50835.13360315349 {'max_features': 5, 'n_estimators': 21}
49280.9449827171 {'max_features': 7, 'n_estimators': 122}
50774.90662363929 {'max_features': 3, 'n_estimators': 75}
50682.78888164288 {'max_features': 3, 'n_estimators': 88}
49608.99608105296 {'max_features': 5, 'n_estimators': 100}
50473.61930350219 {'max_features': 3, 'n_estimators': 150}
64429.84143294435 {'max_features': 5, 'n_estimators': 2}
최상의 랜덤 포레스트 모델에서 사용된 특성들의 중요도를 확인하여 일부 특성을 제외할 수 있다.
feature_importances_ 속성 활용feature_importances = grid_search.best_estimator_.feature_importances_
feature_importances
array([7.33442355e-02, 6.29090705e-02, 4.11437985e-02, 1.46726854e-02,
       1.41064835e-02, 1.48742809e-02, 1.42575993e-02, 3.66158981e-01,
       5.64191792e-02, 1.08792957e-01, 5.33510773e-02, 1.03114883e-02,
       1.64780994e-01, 6.02803867e-05, 1.96041560e-03, 2.85647464e-03])
중요도 기준으로 정렬하여 특성과 함께 보여주면 다음과 같다.
extra_attribs = ["rooms_per_hhold", "pop_per_hhold", "bedrooms_per_room"]
# 범주형 특성에 사용된 5개의 범주 이름을 다시 가져오기
cat_encoder = full_pipeline.named_transformers_["cat"]
cat_one_hot_attribs = list(cat_encoder.categories_[0])
attributes = num_attribs + extra_attribs + cat_one_hot_attribs
# 특성 중요도와 특성 이름을 쌍으로 한 다음에 중요도 기준 내림차순으로 정렬.
sorted(zip(feature_importances, attributes), reverse=True)
[(0.36615898061813423, 'median_income'), (0.16478099356159054, 'INLAND'), (0.10879295677551575, 'pop_per_hhold'), (0.07334423551601243, 'longitude'), (0.06290907048262032, 'latitude'), (0.056419179181954014, 'rooms_per_hhold'), (0.053351077347675815, 'bedrooms_per_room'), (0.04114379847872964, 'housing_median_age'), (0.014874280890402769, 'population'), (0.014672685420543239, 'total_rooms'), (0.014257599323407808, 'households'), (0.014106483453584104, 'total_bedrooms'), (0.010311488326303788, '<1H OCEAN'), (0.0028564746373201584, 'NEAR OCEAN'), (0.0019604155994780706, 'NEAR BAY'), (6.0280386727366e-05, 'ISLAND')]
튜닝된 모델을 테스트 세트 데이터를 이용하여 성능을 최종적으로 평가한다. 아래 코드에서는 그리드 탐색으로 찾은 최적 모델을 활용한다. 최종 성능 편가는 지금까지 전혀 사용하지 않았던 테스트 세트를 훈련 세트와 동일하게 전처리 한 후에 성능 평가를 진행해야 한다.
final_model = grid_search.best_estimator_
# 테스트 세트의 레이블(타깃) 분류
X_test = strat_test_set.drop("median_house_value", axis=1)
y_test = strat_test_set["median_house_value"].copy()
# 훈련 특성 전처리
# 주의사항: fit() 메서드는 사용하지 않음.
X_test_prepared = full_pipeline.transform(X_test)
# 예측하기
final_predictions = final_model.predict(X_test_prepared)
# RMSE 평가
final_mse = mean_squared_error(y_test, final_predictions)
final_rmse = np.sqrt(final_mse)
테스트 세트를 대상으로 한 최종 성능(RMSE)은 아래와 같다.
final_rmse
47730.22690385927
얻어진 테스트 RMSE에 대한 95% 신뢰 구간을 계산하여 확률적으로 시스템의 성능을 예측할 수 있다.
scipy의 stats 모듈 활용from scipy import stats
confidence = 0.95
squared_errors = (final_predictions - y_test) ** 2
np.sqrt(stats.t.interval(confidence, len(squared_errors) - 1,
                         loc=squared_errors.mean(),
                         scale=stats.sem(squared_errors)))
array([45685.10470776, 49691.25001878])
z-분포를 이용한 신뢰구간은 다음과 같다.
zscore = stats.norm.ppf((1 + confidence) / 2)
zmargin = zscore * squared_errors.std(ddof=1) / np.sqrt(m)
np.sqrt(mean - zmargin), np.sqrt(mean + zmargin)
(45685.717918136455, 49690.68623889413)
full_pipeline_with_predictor = Pipeline([
        ("preparation", full_pipeline),
        ("linear", LinearRegression())
    ])
full_pipeline_with_predictor.fit(housing, housing_labels)
full_pipeline_with_predictor.predict(some_data)
array([210644.60459286, 317768.80697211, 210956.43331178,  59218.98886849,
       189747.55849879])
my_model = full_pipeline_with_predictor
import joblib
joblib.dump(my_model, "my_model.pkl") # DIFF
#...
my_model_loaded = joblib.load("my_model.pkl") # DIFF
RandomizedSearchCV를 위한 Scipy 분포 함수¶랜덤 탐색을 하려면 특정 옵션 변수갈들을 무작위로 선택해주는 확률분포함수를 지정해야 한다. 앞서 랜덤 탐색을 이용하여 캘리포니아 인구조사 데이터셋 분석을 위한 최상의 랜덤 포레스트 모델을 찾았을 때 사용한 옵션 변수와 확률분포는 다음과 같다.
'n_estimators': randint(low=1, high=200)'max_features': randint(low=1, high=8)기타 확률분포는 Scipy 패키지의 stats 모듈에 정의되어 있다.
아래는 그 중에서 기하분포 geom, 지수분포 expon,
균등분포 uniform, 정규분포 norm 등을 이용하여
무작위로 생성된 샘플들의 분포를 히스토그램으로 보여준다.
주의: 히스토그램의 y축은 도수를 가리킨다. 이것을 10,000으로 나눈 값으로 대체하면 히스토그램에서 해당 확률분포의 그래프가 그려지게 된다. 즉, 확률밀도함수의 그래프로 감싸인 영역이 표시된다. 자세한 사항은 더이상 다루지 않는다. 보다 자세한 사항은 사이파이를 이용한 확률분포 분석를 참조할 수 있다.
import matplotlib.pyplot as plt
from scipy.stats import geom, expon, norm, uniform
# 기하분포
geom_distrib=geom(0.5).rvs(10000, random_state=42)
# 지수분포
expon_distrib=expon().rvs(10000, random_state=42)
# 균등분포
uniform_distrib=uniform().rvs(10000)
# 정규분포
norm_distrib=norm().rvs(size=10000, random_state=42)
# 발생할 확률이 0.5인 사건이 몇 번 시도하면 발생하는가를 10,000번 실험한 결과
plt.hist(geom_distrib, bins=50)
plt.show()
# 특정 사건이 발생할 때까지 걸리는 시간을 10,000번 실험한 결과
plt.hist(expon_distrib, bins=50)
plt.show()
# 0과 1사의 실수를 임의로, 하지만 균등하게 10,000번 선택한 결과
plt.hist(uniform_distrib, bins=50)
plt.show()
# 무작위로 10,000개의 숫자를 선택한 결과. 단, 표준정규분포를 따라야 함. 
plt.hist(norm_distrib, bins=50)
plt.show()
아래의 세 가지 변환을 전처리 과정에 추가하면 훈련된 모델의 성능이 어떻게 얼마나 달라지는지 이전 모델과 비교하라.
변환 1
중간 소득과 중간 주택 가격 사이의 상관관계 그래프에서 확인할 수 있는 수평선에 위치한 데이터를 삭제한다.
변환 2
회귀 모델 훈련에 사용되는 12개의 특성 중에 세 개는 기존 9개의 특성을 조합하여 생성하였다. 12개의 특성 중에 중간 주택 가격과의 상관계수의 절댓값이 0.2 보다 작은 특성을 삭제한다.
주의사항: 특성 삭제는 훈련 세트 뿐만 아니라 테스트 세트에 대해서도 진행해야 한다. 특성이 다르면 동일한 모델을 적용할 수 없다.
변환 3
범주형 특성을 제외한 9개 특성별 히스토그램을 보면 일부 특성의 히스토그램이 좌우 비대칭이다. (전문 용어로 왜도(skewness)가 0이 아닐 때 이런 분포가 발생한다.) 대표적으로 방의 총 개수(total_rooms), 침실 총 개수(total_bedrooms), 인구(population), 가구수(households), 중간소득(median_income) 등 다섯 개의 특성이 그렇다. 앞서 언급된 5개 특성 또는 일부에 대해 로그 변환을 적용한다.
In [465], 즉 465번째 코드 셀(Testing if fixing Skewed Features Help) 참조로그 변환(log transformation)은 수치 데이터에 로그(log) 함수를 적용하는 변환이며, 로그 변환이 이루어진 데이터의 분포는 보다 정규 분포에 가까워진다. 
아래 그림은 왜도가 0이 아닌 분포(상단)에 로그 변환을 가했을 때 생성된 분포(하단)의 왜도가 훨씬 약화되어 정규분포에 가까워지는 것을 보여준다. 

<그림 출처: StackExchange>
질문: 서포트 벡터 머신 회귀(sklearn.svm.SVR)를 kernel=“linear”(하이퍼파라미터 C를 바꿔가며)나 kernel=“rbf”(하이퍼파라미터 C와 gamma를 바꿔가며) 등의 다양한 하이퍼파라미터 설정으로 시도해보세요. 지금은 이 하이퍼파라미터가 무엇을 의미하는지 너무 신경 쓰지 마세요. 최상의 SVR 모델은 무엇인가요?
경고: 사용하는 하드웨어에 따라 다음 셀을 실행하는데 30분 또는 그 이상 걸릴 수 있습니다.
from sklearn.model_selection import GridSearchCV
param_grid = [
        {'kernel': ['linear'], 'C': [10., 30., 100., 300., 1000., 3000., 10000., 30000.0]},
        {'kernel': ['rbf'], 'C': [1.0, 3.0, 10., 30., 100., 300., 1000.0],
         'gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0]},
    ]
svm_reg = SVR()
grid_search = GridSearchCV(svm_reg, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(housing_prepared, housing_labels)
Fitting 5 folds for each of 50 candidates, totalling 250 fits [CV] C=10.0, kernel=linear ...........................................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] ............................ C=10.0, kernel=linear, total= 9.8s [CV] C=10.0, kernel=linear ...........................................
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.8s remaining: 0.0s
[CV] ............................ C=10.0, kernel=linear, total= 9.8s [CV] C=10.0, kernel=linear ........................................... [CV] ............................ C=10.0, kernel=linear, total= 9.8s [CV] C=10.0, kernel=linear ........................................... [CV] ............................ C=10.0, kernel=linear, total= 10.7s [CV] C=10.0, kernel=linear ........................................... [CV] ............................ C=10.0, kernel=linear, total= 9.8s [CV] C=30.0, kernel=linear ........................................... [CV] ............................ C=30.0, kernel=linear, total= 9.6s [CV] C=30.0, kernel=linear ........................................... [CV] ............................ C=30.0, kernel=linear, total= 9.6s [CV] C=30.0, kernel=linear ........................................... [CV] ............................ C=30.0, kernel=linear, total= 10.0s [CV] C=30.0, kernel=linear ........................................... [CV] ............................ C=30.0, kernel=linear, total= 9.8s [CV] C=30.0, kernel=linear ........................................... [CV] ............................ C=30.0, kernel=linear, total= 9.6s [CV] C=100.0, kernel=linear .......................................... [CV] ........................... C=100.0, kernel=linear, total= 9.7s [CV] C=100.0, kernel=linear .......................................... [CV] ........................... C=100.0, kernel=linear, total= 9.5s [CV] C=100.0, kernel=linear .......................................... [CV] ........................... C=100.0, kernel=linear, total= 9.7s [CV] C=100.0, kernel=linear .......................................... [CV] ........................... C=100.0, kernel=linear, total= 9.6s [CV] C=100.0, kernel=linear .......................................... [CV] ........................... C=100.0, kernel=linear, total= 9.5s [CV] C=300.0, kernel=linear .......................................... [CV] ........................... C=300.0, kernel=linear, total= 9.7s [CV] C=300.0, kernel=linear .......................................... [CV] ........................... C=300.0, kernel=linear, total= 9.7s [CV] C=300.0, kernel=linear .......................................... [CV] ........................... C=300.0, kernel=linear, total= 9.8s [CV] C=300.0, kernel=linear .......................................... [CV] ........................... C=300.0, kernel=linear, total= 9.8s [CV] C=300.0, kernel=linear .......................................... [CV] ........................... C=300.0, kernel=linear, total= 9.7s [CV] C=1000.0, kernel=linear ......................................... [CV] .......................... C=1000.0, kernel=linear, total= 10.0s [CV] C=1000.0, kernel=linear ......................................... [CV] .......................... C=1000.0, kernel=linear, total= 10.2s [CV] C=1000.0, kernel=linear ......................................... [CV] .......................... C=1000.0, kernel=linear, total= 10.1s [CV] C=1000.0, kernel=linear ......................................... [CV] .......................... C=1000.0, kernel=linear, total= 10.2s [CV] C=1000.0, kernel=linear ......................................... [CV] .......................... C=1000.0, kernel=linear, total= 9.9s [CV] C=3000.0, kernel=linear ......................................... [CV] .......................... C=3000.0, kernel=linear, total= 10.9s [CV] C=3000.0, kernel=linear ......................................... [CV] .......................... C=3000.0, kernel=linear, total= 10.8s [CV] C=3000.0, kernel=linear ......................................... [CV] .......................... C=3000.0, kernel=linear, total= 11.1s [CV] C=3000.0, kernel=linear ......................................... [CV] .......................... C=3000.0, kernel=linear, total= 11.1s [CV] C=3000.0, kernel=linear ......................................... [CV] .......................... C=3000.0, kernel=linear, total= 10.7s [CV] C=10000.0, kernel=linear ........................................ [CV] ......................... C=10000.0, kernel=linear, total= 14.7s [CV] C=10000.0, kernel=linear ........................................ [CV] ......................... C=10000.0, kernel=linear, total= 14.9s [CV] C=10000.0, kernel=linear ........................................ [CV] ......................... C=10000.0, kernel=linear, total= 15.1s [CV] C=10000.0, kernel=linear ........................................ [CV] ......................... C=10000.0, kernel=linear, total= 13.9s [CV] C=10000.0, kernel=linear ........................................ [CV] ......................... C=10000.0, kernel=linear, total= 13.4s [CV] C=30000.0, kernel=linear ........................................ [CV] ......................... C=30000.0, kernel=linear, total= 23.8s [CV] C=30000.0, kernel=linear ........................................ [CV] ......................... C=30000.0, kernel=linear, total= 24.4s [CV] C=30000.0, kernel=linear ........................................ [CV] ......................... C=30000.0, kernel=linear, total= 25.6s [CV] C=30000.0, kernel=linear ........................................ [CV] ......................... C=30000.0, kernel=linear, total= 24.6s [CV] C=30000.0, kernel=linear ........................................ [CV] ......................... C=30000.0, kernel=linear, total= 21.7s [CV] C=1.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=1.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=1.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=1.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=1.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=1.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=1.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=1.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=1.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=1.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=1.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=1.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=1.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.1, kernel=rbf, total= 16.7s [CV] C=1.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.1, kernel=rbf, total= 16.7s [CV] C=1.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=1.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=1.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=1.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=1.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=1.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=1.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=1.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=1.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=1.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=1.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=3.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.01, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.03, kernel=rbf, total= 16.8s [CV] C=3.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.03, kernel=rbf, total= 16.8s [CV] C=3.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.03, kernel=rbf ................................... [CV] .................... C=3.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=3.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=3.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.1, kernel=rbf, total= 16.7s [CV] C=3.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=3.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=3.0, gamma=0.1, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.1, kernel=rbf, total= 16.7s [CV] C=3.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=0.3, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=3.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=3.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=3.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=3.0, gamma=1.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=3.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=3.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=3.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=3.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=3.0, gamma=3.0, kernel=rbf .................................... [CV] ..................... C=3.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=10.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=10.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=10.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=10.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=10.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=10.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=10.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=10.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=10.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.03, kernel=rbf, total= 16.8s [CV] C=10.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=10.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=10.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=10.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=10.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=10.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=10.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.1, kernel=rbf, total= 16.6s [CV] C=10.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=10.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=10.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=10.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=10.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=10.0, gamma=0.3, kernel=rbf, total= 16.1s [CV] C=10.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=10.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=10.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=10.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=10.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=10.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=10.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=10.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=10.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=3.0, kernel=rbf, total= 16.3s [CV] C=10.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=10.0, gamma=3.0, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.01, kernel=rbf, total= 17.1s [CV] C=30.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.01, kernel=rbf, total= 17.1s [CV] C=30.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=30.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=30.0, gamma=0.01, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.01, kernel=rbf, total= 17.0s [CV] C=30.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=30.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=30.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.03, kernel=rbf, total= 16.9s [CV] C=30.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.03, kernel=rbf, total= 17.0s [CV] C=30.0, gamma=0.03, kernel=rbf .................................. [CV] ................... C=30.0, gamma=0.03, kernel=rbf, total= 17.1s [CV] C=30.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.1, kernel=rbf, total= 16.8s [CV] C=30.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.1, kernel=rbf, total= 16.8s [CV] C=30.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.1, kernel=rbf, total= 16.9s [CV] C=30.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.1, kernel=rbf, total= 16.9s [CV] C=30.0, gamma=0.1, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.1, kernel=rbf, total= 17.0s [CV] C=30.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.3, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.3, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.3, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.3, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=0.3, kernel=rbf ................................... [CV] .................... C=30.0, gamma=0.3, kernel=rbf, total= 16.3s [CV] C=30.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=1.0, kernel=rbf, total= 15.6s [CV] C=30.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=1.0, kernel=rbf, total= 15.6s [CV] C=30.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=30.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=1.0, kernel=rbf, total= 15.5s [CV] C=30.0, gamma=1.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=30.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=30.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=30.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=30.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=30.0, gamma=3.0, kernel=rbf ................................... [CV] .................... C=30.0, gamma=3.0, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=100.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=100.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.01, kernel=rbf, total= 16.8s [CV] C=100.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=100.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.01, kernel=rbf, total= 16.9s [CV] C=100.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.03, kernel=rbf, total= 16.6s [CV] C=100.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.03, kernel=rbf, total= 16.6s [CV] C=100.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.03, kernel=rbf, total= 16.6s [CV] C=100.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.03, kernel=rbf, total= 16.6s [CV] C=100.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=100.0, gamma=0.03, kernel=rbf, total= 16.6s [CV] C=100.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.1, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.1, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.1, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.1, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.1, kernel=rbf, total= 16.2s [CV] C=100.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.3, kernel=rbf, total= 15.9s [CV] C=100.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.3, kernel=rbf, total= 15.9s [CV] C=100.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.3, kernel=rbf, total= 15.8s [CV] C=100.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.3, kernel=rbf, total= 15.8s [CV] C=100.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=100.0, gamma=0.3, kernel=rbf, total= 15.9s [CV] C=100.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=100.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=100.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=100.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=100.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=1.0, kernel=rbf, total= 15.4s [CV] C=100.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=100.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=100.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=100.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=100.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=100.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.01, kernel=rbf, total= 16.5s [CV] C=300.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.01, kernel=rbf, total= 16.5s [CV] C=300.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.01, kernel=rbf, total= 16.5s [CV] C=300.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.01, kernel=rbf, total= 16.5s [CV] C=300.0, gamma=0.01, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.01, kernel=rbf, total= 16.6s [CV] C=300.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.03, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.03, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.03, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.03, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.03, kernel=rbf ................................. [CV] .................. C=300.0, gamma=0.03, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.1, kernel=rbf, total= 15.8s [CV] C=300.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.1, kernel=rbf, total= 15.9s [CV] C=300.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.1, kernel=rbf, total= 15.8s [CV] C=300.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.1, kernel=rbf, total= 15.8s [CV] C=300.0, gamma=0.1, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.1, kernel=rbf, total= 15.8s [CV] C=300.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.3, kernel=rbf, total= 15.7s [CV] C=300.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.3, kernel=rbf, total= 15.6s [CV] C=300.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.3, kernel=rbf, total= 15.6s [CV] C=300.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.3, kernel=rbf, total= 15.6s [CV] C=300.0, gamma=0.3, kernel=rbf .................................. [CV] ................... C=300.0, gamma=0.3, kernel=rbf, total= 15.6s [CV] C=300.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=300.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=300.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=300.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=300.0, gamma=1.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=300.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=3.0, kernel=rbf, total= 16.0s [CV] C=300.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=300.0, gamma=3.0, kernel=rbf .................................. [CV] ................... C=300.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1000.0, gamma=0.01, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.01, kernel=rbf, total= 15.9s [CV] C=1000.0, gamma=0.01, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.01, kernel=rbf, total= 16.0s [CV] C=1000.0, gamma=0.01, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.01, kernel=rbf, total= 16.0s [CV] C=1000.0, gamma=0.01, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.01, kernel=rbf, total= 15.9s [CV] C=1000.0, gamma=0.01, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.01, kernel=rbf, total= 15.9s [CV] C=1000.0, gamma=0.03, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.03, kernel=rbf, total= 15.7s [CV] C=1000.0, gamma=0.03, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.03, kernel=rbf, total= 15.7s [CV] C=1000.0, gamma=0.03, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.03, kernel=rbf, total= 15.7s [CV] C=1000.0, gamma=0.03, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.03, kernel=rbf, total= 15.7s [CV] C=1000.0, gamma=0.03, kernel=rbf ................................ [CV] ................. C=1000.0, gamma=0.03, kernel=rbf, total= 15.7s [CV] C=1000.0, gamma=0.1, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.1, kernel=rbf, total= 15.5s [CV] C=1000.0, gamma=0.1, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.1, kernel=rbf, total= 15.5s [CV] C=1000.0, gamma=0.1, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.1, kernel=rbf, total= 15.6s [CV] C=1000.0, gamma=0.1, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.1, kernel=rbf, total= 15.5s [CV] C=1000.0, gamma=0.1, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.1, kernel=rbf, total= 15.5s [CV] C=1000.0, gamma=0.3, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.3, kernel=rbf, total= 15.4s [CV] C=1000.0, gamma=0.3, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.3, kernel=rbf, total= 15.4s [CV] C=1000.0, gamma=0.3, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.3, kernel=rbf, total= 15.4s [CV] C=1000.0, gamma=0.3, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.3, kernel=rbf, total= 15.4s [CV] C=1000.0, gamma=0.3, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=0.3, kernel=rbf, total= 15.4s [CV] C=1000.0, gamma=1.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=1000.0, gamma=1.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=1000.0, gamma=1.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=1000.0, gamma=1.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=1.0, kernel=rbf, total= 15.3s [CV] C=1000.0, gamma=1.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=1.0, kernel=rbf, total= 15.2s [CV] C=1000.0, gamma=3.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1000.0, gamma=3.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1000.0, gamma=3.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1000.0, gamma=3.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=3.0, kernel=rbf, total= 16.1s [CV] C=1000.0, gamma=3.0, kernel=rbf ................................. [CV] .................. C=1000.0, gamma=3.0, kernel=rbf, total= 16.1s
[Parallel(n_jobs=1)]: Done 250 out of 250 | elapsed: 64.9min finished
GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'C': [10.0, 30.0, 100.0, 300.0, 1000.0, 3000.0,
                                10000.0, 30000.0],
                          'kernel': ['linear']},
                         {'C': [1.0, 3.0, 10.0, 30.0, 100.0, 300.0, 1000.0],
                          'gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)
최상 모델의 (5-폴드 교차 검증으로 평가한) 점수는 다음과 같습니다:
negative_mse = grid_search.best_score_
rmse = np.sqrt(-negative_mse)
rmse
70363.84006944533
RandomForestRegressor보다 훨씬 좋지 않네요. 최상의 하이퍼파라미터를 확인해 보겠습니다:
grid_search.best_params_
{'C': 30000.0, 'kernel': 'linear'}
선형 커널이 RBF 커널보다 성능이 나은 것 같습니다. C는 테스트한 것 중에 최대값이 선택되었습니다. 따라서 (작은 값들은 지우고) 더 큰 값의 C로 그리드서치를 다시 실행해 보아야 합니다. 아마도 더 큰 값의 C에서 성능이 높아질 것입니다.
질문: GridSearchCV를 RandomizedSearchCV로 바꿔보세요.
경고: 사용하는 하드웨어에 따라 다음 셀을 실행하는데 45분 또는 그 이상 걸릴 수 있습니다.
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import expon, reciprocal
# expon(), reciprocal()와 그외 다른 확률 분포 함수에 대해서는
# https://docs.scipy.org/doc/scipy/reference/stats.html를 참고하세요.
# 노트: kernel 매개변수가 "linear"일 때는 gamma가 무시됩니다.
param_distribs = {
        'kernel': ['linear', 'rbf'],
        'C': reciprocal(20, 200000),
        'gamma': expon(scale=1.0),
    }
svm_reg = SVR()
rnd_search = RandomizedSearchCV(svm_reg, param_distributions=param_distribs,
                                n_iter=50, cv=5, scoring='neg_mean_squared_error',
                                verbose=2, random_state=42)
rnd_search.fit(housing_prepared, housing_labels)
Fitting 5 folds for each of 50 candidates, totalling 250 fits [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear ......
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear, total= 9.7s [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear ......
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 9.7s remaining: 0.0s
[CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear, total= 9.9s [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear ...... [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear, total= 10.0s [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear ...... [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear, total= 9.9s [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear ...... [CV] C=629.782329591372, gamma=3.010121430917521, kernel=linear, total= 9.9s [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf ...... [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf, total= 18.9s [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf ...... [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf, total= 19.4s [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf ...... [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf, total= 19.2s [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf ...... [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf, total= 19.4s [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf ...... [CV] C=26290.206464300216, gamma=0.9084469696321253, kernel=rbf, total= 19.7s [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf ..... [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf, total= 16.4s [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf ..... [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf, total= 16.4s [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf ..... [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf, total= 16.4s [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf ..... [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf, total= 16.4s [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf ..... [CV] C=84.14107900575871, gamma=0.059838768608680676, kernel=rbf, total= 16.4s [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear .. [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear, total= 9.6s [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear .. [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear, total= 9.6s [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear .. [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear, total= 9.8s [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear .. [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear, total= 9.8s [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear .. [CV] C=432.37884813148855, gamma=0.15416196746656105, kernel=linear, total= 9.8s [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf ........ [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf, total= 16.5s [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf ........ [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf, total= 16.5s [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf ........ [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf, total= 16.6s [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf ........ [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf, total= 16.5s [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf ........ [CV] C=24.17508294611391, gamma=3.503557475158312, kernel=rbf, total= 16.5s [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf ... [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf, total= 15.8s [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf ... [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf, total= 15.8s [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf ... [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf, total= 15.9s [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf ... [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf, total= 15.9s [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf ... [CV] C=113564.03940586245, gamma=0.0007790692366582295, kernel=rbf, total= 15.9s [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf ...... [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf, total= 15.7s [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf ...... [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf, total= 15.7s [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf ...... [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf, total= 15.7s [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf ...... [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf, total= 15.7s [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf ...... [CV] C=108.30488238805073, gamma=0.3627537294604771, kernel=rbf, total= 15.7s [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear . [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear, total= 9.6s [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear . [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear, total= 9.7s [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear . [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear, total= 9.7s [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear . [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear, total= 9.6s [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear . [CV] C=21.344953672647435, gamma=0.023332523598323388, kernel=linear, total= 9.7s [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf ...... [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf, total= 15.4s [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf ...... [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf, total= 15.4s [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf ...... [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf, total= 15.4s [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf ...... [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf, total= 15.4s [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf ...... [CV] C=5603.270317432516, gamma=0.15023452872733867, kernel=rbf, total= 15.4s [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf ..... [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf, total= 38.1s [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf ..... [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf, total= 39.9s [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf ..... [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf, total= 45.9s [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf ..... [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf, total= 37.1s [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf ..... [CV] C=157055.10989448498, gamma=0.26497040005002437, kernel=rbf, total= 41.2s [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear ... [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear, total= 22.1s [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear ... [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear, total= 23.0s [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear ... [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear, total= 24.5s [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear ... [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear, total= 22.0s [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear ... [CV] C=27652.464358739708, gamma=0.2227358621286903, kernel=linear, total= 19.5s [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear .... [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear, total= 1.6min [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear .... [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear, total= 1.2min [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear .... [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear, total= 1.6min [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear .... [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear, total= 1.4min [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear .... [CV] C=171377.39570378003, gamma=0.628789100540856, kernel=linear, total= 1.1min [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear ... [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear, total= 12.1s [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear ... [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear, total= 12.2s [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear ... [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear, total= 12.5s [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear ... [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear, total= 12.0s [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear ... [CV] C=5385.293820172355, gamma=0.18696125197741642, kernel=linear, total= 12.2s [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf ........ [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf, total= 16.0s [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf ........ [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf, total= 16.1s [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf ........ [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf, total= 16.0s [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf ........ [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf, total= 16.0s [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf ........ [CV] C=22.59903216621323, gamma=2.850796878935603, kernel=rbf, total= 16.1s [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear .... [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear, total= 26.3s [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear .... [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear, total= 25.7s [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear .... [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear, total= 27.3s [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear .... [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear, total= 26.2s [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear .... [CV] C=34246.75194632794, gamma=0.3632878599687583, kernel=linear, total= 23.4s [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf ....... [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf, total= 15.8s [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf ....... [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf, total= 15.8s [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf ....... [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf, total= 15.8s [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf ....... [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf, total= 15.8s [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf ....... [CV] C=167.7278956080511, gamma=0.2757870542258224, kernel=rbf, total= 15.8s [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear .... [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear, total= 9.7s [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear .... [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear, total= 9.6s [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear .... [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear, total= 9.7s [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear .... [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear, total= 9.7s [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear .... [CV] C=61.54360542501371, gamma=0.6835472281341501, kernel=linear, total= 9.4s [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf ....... [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf, total= 15.7s [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf ....... [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf, total= 15.7s [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf ....... [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf, total= 15.6s [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf ....... [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf, total= 15.7s [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf ....... [CV] C=98.73897389920914, gamma=0.4960365360493639, kernel=rbf, total= 15.6s [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf ...... [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf, total= 15.5s [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf ...... [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf, total= 15.6s [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf ...... [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf, total= 15.6s [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf ...... [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf, total= 15.6s [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf ...... [CV] C=8935.505635947808, gamma=0.37354658165762367, kernel=rbf, total= 15.6s [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear .... [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear, total= 9.6s [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear .... [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear, total= 9.6s [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear .... [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear, total= 9.7s [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear .... [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear, total= 9.7s [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear .... [CV] C=135.76775824842434, gamma=0.838636245624803, kernel=linear, total= 9.4s [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf ...... [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf, total= 3.6min [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf ...... [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf, total= 2.8min [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf ...... [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf, total= 2.6min [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf ...... [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf, total= 3.2min [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf ...... [CV] C=151136.20282548846, gamma=1.4922453771381408, kernel=rbf, total= 3.2min [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear .... [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear, total= 9.8s [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear .... [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear, total= 9.9s [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear .... [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear, total= 10.0s [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear .... [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear, total= 9.9s [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear .... [CV] C=761.4316758498783, gamma=2.6126336514161914, kernel=linear, total= 9.9s [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear ... [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear, total= 52.9s [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear ... [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear, total= 51.1s [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear ... [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear, total= 1.3min [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear ... [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear, total= 54.1s [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear ... [CV] C=97392.81883041795, gamma=0.09265545895311562, kernel=linear, total= 44.0s [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear .... [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear, total= 11.2s [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear .... [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear, total= 11.2s [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear .... [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear, total= 10.8s [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear .... [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear, total= 11.3s [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear .... [CV] C=2423.0759984939164, gamma=3.248614270240346, kernel=linear, total= 10.4s [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear .... [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear, total= 10.0s [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear .... [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear, total= 9.8s [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear .... [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear, total= 10.1s [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear .... [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear, total= 10.2s [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear .... [CV] C=717.3632997255095, gamma=0.3165604432088257, kernel=linear, total= 9.8s [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf ....... [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf, total= 16.6s [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf ....... [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf, total= 16.6s [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf ....... [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf, total= 16.6s [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf ....... [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf, total= 16.6s [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf ....... [CV] C=4446.667521184072, gamma=3.3597284456608496, kernel=rbf, total= 16.6s [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear ... [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear, total= 11.0s [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear ... [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear, total= 11.5s [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear ... [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear, total= 11.7s [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear ... [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear, total= 11.0s [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear ... [CV] C=2963.564121207815, gamma=0.15189814782062885, kernel=linear, total= 10.9s [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear ... [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear, total= 9.4s [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear ... [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear, total= 9.6s [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear ... [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear, total= 9.7s [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear ... [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear, total= 9.7s [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear ... [CV] C=91.64267381686706, gamma=0.01575994483585621, kernel=linear, total= 9.4s [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf ..... [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf, total= 16.4s [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf ..... [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf, total= 16.4s [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf ..... [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf, total= 16.4s [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf ..... [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf, total= 16.3s [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf ..... [CV] C=24547.601975705915, gamma=0.22153944050588595, kernel=rbf, total= 16.3s [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf ...... [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf, total= 16.1s [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf ...... [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf, total= 16.2s [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf ...... [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf, total= 16.1s [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf ...... [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf, total= 16.1s [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf ...... [CV] C=22.76927941060928, gamma=0.22169760231351215, kernel=rbf, total= 16.1s [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear ... [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear, total= 16.4s [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear ... [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear, total= 17.7s [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear ... [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear, total= 17.8s [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear ... [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear, total= 18.2s [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear ... [CV] C=16483.850529752886, gamma=1.4752145260435134, kernel=linear, total= 15.6s [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf ....... [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf, total= 1.2min [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf ....... [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf, total= 1.2min [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf ....... [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf, total= 1.5min [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf ....... [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf, total= 1.6min [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf ....... [CV] C=101445.66881340064, gamma=1.052904084582266, kernel=rbf, total= 1.3min [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf ....... [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf, total= 31.2s [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf ....... [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf, total= 31.6s [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf ....... [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf, total= 31.0s [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf ....... [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf, total= 35.0s [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf ....... [CV] C=56681.80859029545, gamma=0.9763011917123741, kernel=rbf, total= 33.3s [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf ....... [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf, total= 15.7s [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf ....... [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf, total= 15.7s [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf ....... [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf, total= 15.7s [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf ....... [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf, total= 15.7s [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf ....... [CV] C=48.15822390928914, gamma=0.4633351167983427, kernel=rbf, total= 15.7s [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf ....... [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf, total= 15.3s [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf ....... [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf, total= 15.3s [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf ....... [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf, total= 15.3s [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf ....... [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf, total= 15.3s [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf ....... [CV] C=399.7268155705774, gamma=1.3078757839577408, kernel=rbf, total= 15.2s [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear ... [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear, total= 9.5s [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear ... [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear, total= 9.7s [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear ... [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear, total= 9.8s [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear ... [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear, total= 9.8s [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear ... [CV] C=251.14073886281363, gamma=0.8238105204914145, kernel=linear, total= 9.7s [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear .... [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear, total= 9.7s [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear .... [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear, total= 9.6s [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear .... [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear, total= 9.8s [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear .... [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear, total= 9.6s [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear .... [CV] C=60.17373642891687, gamma=1.2491263443165994, kernel=linear, total= 9.4s [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf ...... [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf, total= 15.9s [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf ...... [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf, total= 15.9s [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf ...... [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf, total= 16.0s [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf ...... [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf, total= 15.8s [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf ...... [CV] C=15415.161544891856, gamma=0.2691677514619319, kernel=rbf, total= 15.9s [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear .... [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear, total= 10.6s [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear .... [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear, total= 10.7s [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear .... [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear, total= 10.6s [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear .... [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear, total= 10.6s [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear .... [CV] C=1888.9148509967113, gamma=0.739678838777267, kernel=linear, total= 10.4s [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear ..... [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear, total= 9.6s [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear ..... [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear, total= 9.6s [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear ..... [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear, total= 9.6s [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear ..... [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear, total= 9.6s [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear ..... [CV] C=55.53838911232773, gamma=0.578634378499143, kernel=linear, total= 9.5s [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf ...... [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf, total= 15.4s [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf ...... [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf, total= 15.5s [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf ...... [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf, total= 15.5s [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf ...... [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf, total= 15.4s [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf ...... [CV] C=26.714480823948186, gamma=1.0117295509275495, kernel=rbf, total= 15.4s [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear ... [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear, total= 12.0s [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear ... [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear, total= 11.5s [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear ... [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear, total= 11.8s [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear ... [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear, total= 11.6s [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear ... [CV] C=3582.0552780489566, gamma=1.1891370222133257, kernel=linear, total= 11.1s [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear .... [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear, total= 9.5s [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear .... [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear, total= 9.5s [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear .... [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear, total= 9.8s [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear .... [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear, total= 9.8s [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear .... [CV] C=198.7004781812736, gamma=0.5282819748826726, kernel=linear, total= 9.5s [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear .... [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear, total= 9.8s [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear .... [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear, total= 9.5s [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear .... [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear, total= 9.7s [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear .... [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear, total= 9.6s [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear .... [CV] C=129.8000604143307, gamma=2.8621383676481322, kernel=linear, total= 9.5s [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf ...... [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf, total= 15.8s [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf ...... [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf, total= 15.8s [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf ...... [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf, total= 15.8s [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf ...... [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf, total= 15.8s [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf ...... [CV] C=288.4269299593897, gamma=0.17580835850006285, kernel=rbf, total= 15.8s [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear .... [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear, total= 12.6s [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear .... [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear, total= 12.5s [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear .... [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear, total= 13.1s [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear .... [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear, total= 12.8s [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear .... [CV] C=6287.039489427172, gamma=0.3504567255332862, kernel=linear, total= 12.1s [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf ....... [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf, total= 54.8s [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf ....... [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf, total= 1.0min [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf ....... [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf, total= 59.2s [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf ....... [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf, total= 1.0min [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf ....... [CV] C=61217.04421344494, gamma=1.6279689407405564, kernel=rbf, total= 56.6s [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf ........ [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf, total= 15.6s [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf ........ [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf, total= 15.5s [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf ........ [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf, total= 15.5s [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf ........ [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf, total= 15.5s [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf ........ [CV] C=926.9787684096649, gamma=2.147979593060577, kernel=rbf, total= 15.6s [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear ...... [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear, total= 26.2s [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear ...... [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear, total= 25.4s [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear ...... [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear, total= 23.4s [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear ...... [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear, total= 26.8s [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear ...... [CV] C=33946.157064934, gamma=2.2642426492862313, kernel=linear, total= 23.9s [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear .... [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear, total= 1.1min [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear .... [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear, total= 48.7s [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear .... [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear, total= 1.2min [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear .... [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear, total= 53.6s [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear .... [CV] C=84789.82947739525, gamma=0.3176359085304841, kernel=linear, total= 40.4s
[Parallel(n_jobs=1)]: Done 250 out of 250 | elapsed: 98.5min finished
RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                 epsilon=0.1, gamma='scale', kernel='rbf',
                                 max_iter=-1, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='deprecated', n_iter=50, n_jobs=None,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f1c24673050>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f1c24673a10>,
                                        'kernel': ['linear', 'rbf']},
                   pre_dispatch='2*n_jobs', random_state=42, refit=True,
                   return_train_score=False, scoring='neg_mean_squared_error',
                   verbose=2)
최상 모델의 (5-폴드 교차 검증으로 평가한) 점수는 다음과 같습니다:
negative_mse = rnd_search.best_score_
rmse = np.sqrt(-negative_mse)
rmse
54767.960710084146
이제 RandomForestRegressor의 성능에 훨씬 가까워졌습니다(하지만 아직 차이가 납니다). 최상의 하이퍼파라미터를 확인해 보겠습니다:
rnd_search.best_params_
{'C': 157055.10989448498, 'gamma': 0.26497040005002437, 'kernel': 'rbf'}
이번에는 RBF 커널에 대해 최적의 하이퍼파라미터 조합을 찾았습니다. 보통 랜덤서치가 같은 시간안에 그리드서치보다 더 좋은 하이퍼파라미터를 찾습니다.
여기서 사용된 scale=1.0인 지수 분포를 살펴보겠습니다. 일부 샘플은 1.0보다 아주 크거나 작습니다. 하지만 로그 분포를 보면 대부분의 값이 exp(-2)와 exp(+2), 즉 0.1과 7.4 사이에 집중되어 있음을 알 수 있습니다.
expon_distrib = expon(scale=1.)
samples = expon_distrib.rvs(10000, random_state=42)
plt.figure(figsize=(10, 4))
plt.subplot(121)
plt.title("Exponential distribution (scale=1.0)")
plt.hist(samples, bins=50)
plt.subplot(122)
plt.title("Log of this distribution")
plt.hist(np.log(samples), bins=50)
plt.show()
C에 사용된 분포는 매우 다릅니다. 주어진 범위안에서 균등 분포로 샘플링됩니다. 그래서 오른쪽 로그 분포가 거의 일정하게 나타납니다. 이런 분포는 원하는 축척(scale)이 정확이 무엇인지 모를 때 사용하면 좋습니다:
reciprocal_distrib = reciprocal(20, 200000)
samples = reciprocal_distrib.rvs(10000, random_state=42)
plt.figure(figsize=(10, 4))
plt.subplot(121)
plt.title("Reciprocal distribution (scale=1.0)")
plt.hist(samples, bins=50)
plt.subplot(122)
plt.title("Log of this distribution")
plt.hist(np.log(samples), bins=50)
plt.show()
reciprocal() 함수는 하이퍼파라미터의 축척에 대해 전혀 감을 잡을 수 없을 때 사용합니다(오른쪽 그래프에서 볼 수 있듯이 주어진 범위안에서 모든 값이 균등합니다). 반면 지수 분포는 하이퍼파라미터의 축척을 (어느정도) 알고 있을 때 사용하는 것이 좋습니다.
질문: 가장 중요한 특성을 선택하는 변환기를 준비 파이프라인에 추가해보세요.
from sklearn.base import BaseEstimator, TransformerMixin
def indices_of_top_k(arr, k):
    return np.sort(np.argpartition(np.array(arr), -k)[-k:])
class TopFeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, feature_importances, k):
        self.feature_importances = feature_importances
        self.k = k
    def fit(self, X, y=None):
        self.feature_indices_ = indices_of_top_k(self.feature_importances, self.k)
        return self
    def transform(self, X):
        return X[:, self.feature_indices_]
노트: 이 특성 선택 클래스는 이미 어떤 식으로든 특성 중요도를 계산했다고 가정합니다(가령 RandomForestRegressor을 사용하여). TopFeatureSelector의 fit() 메서드에서 직접 계산할 수도 있지만 (캐싱을 사용하지 않을 경우) 그리드서치나 랜덤서치의 모든 하이퍼파라미터 조합에 대해 계산이 일어나기 때문에 매우 느려집니다.
선택할 특성의 개수를 지정합니다:
k = 5
최상의 k개 특성의 인덱스를 확인해 보겠습니다:
top_k_feature_indices = indices_of_top_k(feature_importances, k)
top_k_feature_indices
array([ 0, 1, 7, 9, 12])
np.array(attributes)[top_k_feature_indices]
array(['longitude', 'latitude', 'median_income', 'pop_per_hhold',
       'INLAND'], dtype='<U18')
최상의 k개 특성이 맞는지 다시 확인합니다:
sorted(zip(feature_importances, attributes), reverse=True)[:k]
[(0.36615898061813423, 'median_income'), (0.16478099356159054, 'INLAND'), (0.10879295677551575, 'pop_per_hhold'), (0.07334423551601243, 'longitude'), (0.06290907048262032, 'latitude')]
좋습니다. 이제 이전에 정의한 준비 파이프라인과 특성 선택기를 추가한 새로운 파이프라인을 만듭니다:
preparation_and_feature_selection_pipeline = Pipeline([
    ('preparation', full_pipeline),
    ('feature_selection', TopFeatureSelector(feature_importances, k))
])
housing_prepared_top_k_features = preparation_and_feature_selection_pipeline.fit_transform(housing)
처음 3개 샘플의 특성을 확인해 보겠습니다:
housing_prepared_top_k_features[0:3]
array([[-1.15604281,  0.77194962, -0.61493744, -0.08649871,  0.        ],
       [-1.17602483,  0.6596948 ,  1.33645936, -0.03353391,  0.        ],
       [ 1.18684903, -1.34218285, -0.5320456 , -0.09240499,  0.        ]])
최상의 k개 특성이 맞는지 다시 확인합니다:
housing_prepared[0:3, top_k_feature_indices]
array([[-1.15604281,  0.77194962, -0.61493744, -0.08649871,  0.        ],
       [-1.17602483,  0.6596948 ,  1.33645936, -0.03353391,  0.        ],
       [ 1.18684903, -1.34218285, -0.5320456 , -0.09240499,  0.        ]])
성공입니다! :)
질문: 전체 데이터 준비 과정과 최종 예측을 하나의 파이프라인으로 만들어보세요.
prepare_select_and_predict_pipeline = Pipeline([
    ('preparation', full_pipeline),
    ('feature_selection', TopFeatureSelector(feature_importances, k)),
    ('svm_reg', SVR(**rnd_search.best_params_))
])
prepare_select_and_predict_pipeline.fit(housing, housing_labels)
Pipeline(memory=None,
         steps=[('preparation',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('imputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value=None,
                                                                                 missing_values=nan,
                                                                                 strategy='median',
                                                                                 verbose=0)),
                                                                  ('attribs_adder',
                                                                   CombinedAttributesAdder(add_...
       1.41064835e-02, 1.48742809e-02, 1.42575993e-02, 3.66158981e-01,
       5.64191792e-02, 1.08792957e-01, 5.33510773e-02, 1.03114883e-02,
       1.64780994e-01, 6.02803867e-05, 1.96041560e-03, 2.85647464e-03]),
                                    k=5)),
                ('svm_reg',
                 SVR(C=157055.10989448498, cache_size=200, coef0=0.0, degree=3,
                     epsilon=0.1, gamma=0.26497040005002437, kernel='rbf',
                     max_iter=-1, shrinking=True, tol=0.001, verbose=False))],
         verbose=False)
몇 개의 샘플에 전체 파이프라인을 적용해 보겠습니다:
some_data = housing.iloc[:4]
some_labels = housing_labels.iloc[:4]
print("Predictions:\t", prepare_select_and_predict_pipeline.predict(some_data))
print("Labels:\t\t", list(some_labels))
Predictions: [203214.28978849 371846.88152572 173295.65441612 47328.3970888 ] Labels: [286600.0, 340600.0, 196900.0, 46300.0]
전체 파이프라인이 잘 작동하는 것 같습니다. 물론 예측 성능이 아주 좋지는 않습니다. SVR보다 RandomForestRegressor가 더 나은 것 같습니다.
질문: GridSearchCV를 사용해 준비 단계의 옵션을 자동으로 탐색해보세요.
경고: 사용하는 하드웨어에 따라 다음 셀을 실행하는데 45분 또는 그 이상 걸릴 수 있습니다.
param_grid = [{
    'preparation__num__imputer__strategy': ['mean', 'median', 'most_frequent'],
    'feature_selection__k': list(range(1, len(feature_importances) + 1))
}]
grid_search_prep = GridSearchCV(prepare_select_and_predict_pipeline, param_grid, cv=5,
                                scoring='neg_mean_squared_error', verbose=2)
grid_search_prep.fit(housing, housing_labels)
Fitting 5 folds for each of 48 candidates, totalling 240 fits [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] feature_selection__k=1, preparation__num__imputer__strategy=mean, total= 12.1s [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 12.1s remaining: 0.0s
[CV] feature_selection__k=1, preparation__num__imputer__strategy=mean, total= 12.2s [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean, total= 12.2s [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean, total= 12.0s [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean [CV] feature_selection__k=1, preparation__num__imputer__strategy=mean, total= 12.0s [CV] feature_selection__k=1, preparation__num__imputer__strategy=median [CV] feature_selection__k=1, preparation__num__imputer__strategy=median, total= 12.0s [CV] feature_selection__k=1, preparation__num__imputer__strategy=median [CV] feature_selection__k=1, preparation__num__imputer__strategy=median, total= 12.2s [CV] feature_selection__k=1, preparation__num__imputer__strategy=median [CV] feature_selection__k=1, preparation__num__imputer__strategy=median, total= 12.1s [CV] feature_selection__k=1, preparation__num__imputer__strategy=median [CV] feature_selection__k=1, preparation__num__imputer__strategy=median, total= 12.0s [CV] feature_selection__k=1, preparation__num__imputer__strategy=median [CV] feature_selection__k=1, preparation__num__imputer__strategy=median, total= 12.1s [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent, total= 12.1s [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent, total= 12.2s [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent, total= 12.1s [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent, total= 12.0s [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=1, preparation__num__imputer__strategy=most_frequent, total= 12.1s [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean, total= 12.7s [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean, total= 12.6s [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean [CV] feature_selection__k=2, preparation__num__imputer__strategy=mean, total= 12.6s [CV] feature_selection__k=2, preparation__num__imputer__strategy=median [CV] feature_selection__k=2, preparation__num__imputer__strategy=median, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=median [CV] feature_selection__k=2, preparation__num__imputer__strategy=median, total= 12.7s [CV] feature_selection__k=2, preparation__num__imputer__strategy=median [CV] feature_selection__k=2, preparation__num__imputer__strategy=median, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=median [CV] feature_selection__k=2, preparation__num__imputer__strategy=median, total= 12.6s [CV] feature_selection__k=2, preparation__num__imputer__strategy=median [CV] feature_selection__k=2, preparation__num__imputer__strategy=median, total= 12.6s [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent, total= 12.7s [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent, total= 12.5s [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent, total= 12.6s [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=2, preparation__num__imputer__strategy=most_frequent, total= 12.6s [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean, total= 12.8s [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean [CV] feature_selection__k=3, preparation__num__imputer__strategy=mean, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=median [CV] feature_selection__k=3, preparation__num__imputer__strategy=median, total= 12.8s [CV] feature_selection__k=3, preparation__num__imputer__strategy=median [CV] feature_selection__k=3, preparation__num__imputer__strategy=median, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=median [CV] feature_selection__k=3, preparation__num__imputer__strategy=median, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=median [CV] feature_selection__k=3, preparation__num__imputer__strategy=median, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=median [CV] feature_selection__k=3, preparation__num__imputer__strategy=median, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent, total= 12.8s [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent, total= 12.7s [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=3, preparation__num__imputer__strategy=most_frequent, total= 12.7s [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean, total= 13.5s [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean [CV] feature_selection__k=4, preparation__num__imputer__strategy=mean, total= 13.6s [CV] feature_selection__k=4, preparation__num__imputer__strategy=median [CV] feature_selection__k=4, preparation__num__imputer__strategy=median, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=median [CV] feature_selection__k=4, preparation__num__imputer__strategy=median, total= 13.6s [CV] feature_selection__k=4, preparation__num__imputer__strategy=median [CV] feature_selection__k=4, preparation__num__imputer__strategy=median, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=median [CV] feature_selection__k=4, preparation__num__imputer__strategy=median, total= 14.0s [CV] feature_selection__k=4, preparation__num__imputer__strategy=median [CV] feature_selection__k=4, preparation__num__imputer__strategy=median, total= 13.6s [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent, total= 14.0s [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent, total= 13.6s [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent, total= 13.9s [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=4, preparation__num__imputer__strategy=most_frequent, total= 13.6s [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean, total= 14.2s [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean, total= 14.3s [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean, total= 14.3s [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean, total= 14.5s [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean [CV] feature_selection__k=5, preparation__num__imputer__strategy=mean, total= 14.2s [CV] feature_selection__k=5, preparation__num__imputer__strategy=median [CV] feature_selection__k=5, preparation__num__imputer__strategy=median, total= 14.2s [CV] feature_selection__k=5, preparation__num__imputer__strategy=median [CV] feature_selection__k=5, preparation__num__imputer__strategy=median, total= 14.4s [CV] feature_selection__k=5, preparation__num__imputer__strategy=median [CV] feature_selection__k=5, preparation__num__imputer__strategy=median, total= 14.3s [CV] feature_selection__k=5, preparation__num__imputer__strategy=median [CV] feature_selection__k=5, preparation__num__imputer__strategy=median, total= 14.6s [CV] feature_selection__k=5, preparation__num__imputer__strategy=median [CV] feature_selection__k=5, preparation__num__imputer__strategy=median, total= 14.1s [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent, total= 14.2s [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent, total= 14.3s [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent, total= 14.3s [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent, total= 14.5s [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=5, preparation__num__imputer__strategy=most_frequent, total= 14.1s [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean, total= 14.5s [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean, total= 14.9s [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean, total= 14.5s [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean, total= 14.3s [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean [CV] feature_selection__k=6, preparation__num__imputer__strategy=mean, total= 14.9s [CV] feature_selection__k=6, preparation__num__imputer__strategy=median [CV] feature_selection__k=6, preparation__num__imputer__strategy=median, total= 14.6s [CV] feature_selection__k=6, preparation__num__imputer__strategy=median [CV] feature_selection__k=6, preparation__num__imputer__strategy=median, total= 14.8s [CV] feature_selection__k=6, preparation__num__imputer__strategy=median [CV] feature_selection__k=6, preparation__num__imputer__strategy=median, total= 14.5s [CV] feature_selection__k=6, preparation__num__imputer__strategy=median [CV] feature_selection__k=6, preparation__num__imputer__strategy=median, total= 14.3s [CV] feature_selection__k=6, preparation__num__imputer__strategy=median [CV] feature_selection__k=6, preparation__num__imputer__strategy=median, total= 14.9s [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent, total= 14.5s [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent, total= 14.8s [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent, total= 14.5s [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent, total= 14.3s [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=6, preparation__num__imputer__strategy=most_frequent, total= 15.0s [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean, total= 15.6s [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean, total= 15.3s [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean, total= 16.0s [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean, total= 15.9s [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean [CV] feature_selection__k=7, preparation__num__imputer__strategy=mean, total= 15.4s [CV] feature_selection__k=7, preparation__num__imputer__strategy=median [CV] feature_selection__k=7, preparation__num__imputer__strategy=median, total= 16.9s [CV] feature_selection__k=7, preparation__num__imputer__strategy=median [CV] feature_selection__k=7, preparation__num__imputer__strategy=median, total= 15.8s [CV] feature_selection__k=7, preparation__num__imputer__strategy=median [CV] feature_selection__k=7, preparation__num__imputer__strategy=median, total= 15.8s [CV] feature_selection__k=7, preparation__num__imputer__strategy=median [CV] feature_selection__k=7, preparation__num__imputer__strategy=median, total= 15.2s [CV] feature_selection__k=7, preparation__num__imputer__strategy=median [CV] feature_selection__k=7, preparation__num__imputer__strategy=median, total= 15.4s [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent, total= 16.6s [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent, total= 15.6s [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent, total= 16.3s [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent, total= 15.2s [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=7, preparation__num__imputer__strategy=most_frequent, total= 16.4s [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean, total= 18.3s [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean, total= 17.8s [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean, total= 20.0s [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean, total= 19.2s [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean [CV] feature_selection__k=8, preparation__num__imputer__strategy=mean, total= 21.0s [CV] feature_selection__k=8, preparation__num__imputer__strategy=median [CV] feature_selection__k=8, preparation__num__imputer__strategy=median, total= 19.1s [CV] feature_selection__k=8, preparation__num__imputer__strategy=median [CV] feature_selection__k=8, preparation__num__imputer__strategy=median, total= 18.0s [CV] feature_selection__k=8, preparation__num__imputer__strategy=median [CV] feature_selection__k=8, preparation__num__imputer__strategy=median, total= 20.0s [CV] feature_selection__k=8, preparation__num__imputer__strategy=median [CV] feature_selection__k=8, preparation__num__imputer__strategy=median, total= 19.9s [CV] feature_selection__k=8, preparation__num__imputer__strategy=median [CV] feature_selection__k=8, preparation__num__imputer__strategy=median, total= 19.4s [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent, total= 18.7s [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent, total= 18.2s [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent, total= 19.7s [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent, total= 17.8s [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=8, preparation__num__imputer__strategy=most_frequent, total= 19.2s [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean, total= 26.5s [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean, total= 26.7s [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean, total= 25.0s [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean, total= 25.2s [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean [CV] feature_selection__k=9, preparation__num__imputer__strategy=mean, total= 23.1s [CV] feature_selection__k=9, preparation__num__imputer__strategy=median [CV] feature_selection__k=9, preparation__num__imputer__strategy=median, total= 26.1s [CV] feature_selection__k=9, preparation__num__imputer__strategy=median [CV] feature_selection__k=9, preparation__num__imputer__strategy=median, total= 26.5s [CV] feature_selection__k=9, preparation__num__imputer__strategy=median [CV] feature_selection__k=9, preparation__num__imputer__strategy=median, total= 22.3s [CV] feature_selection__k=9, preparation__num__imputer__strategy=median [CV] feature_selection__k=9, preparation__num__imputer__strategy=median, total= 25.4s [CV] feature_selection__k=9, preparation__num__imputer__strategy=median [CV] feature_selection__k=9, preparation__num__imputer__strategy=median, total= 22.9s [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent, total= 26.0s [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent, total= 26.5s [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent, total= 25.9s [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent, total= 25.5s [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=9, preparation__num__imputer__strategy=most_frequent, total= 24.5s [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean, total= 26.4s [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean, total= 28.0s [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean, total= 33.8s [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean, total= 30.0s [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean [CV] feature_selection__k=10, preparation__num__imputer__strategy=mean, total= 28.5s [CV] feature_selection__k=10, preparation__num__imputer__strategy=median [CV] feature_selection__k=10, preparation__num__imputer__strategy=median, total= 26.7s [CV] feature_selection__k=10, preparation__num__imputer__strategy=median [CV] feature_selection__k=10, preparation__num__imputer__strategy=median, total= 31.5s [CV] feature_selection__k=10, preparation__num__imputer__strategy=median [CV] feature_selection__k=10, preparation__num__imputer__strategy=median, total= 29.0s [CV] feature_selection__k=10, preparation__num__imputer__strategy=median [CV] feature_selection__k=10, preparation__num__imputer__strategy=median, total= 29.3s [CV] feature_selection__k=10, preparation__num__imputer__strategy=median [CV] feature_selection__k=10, preparation__num__imputer__strategy=median, total= 27.5s [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent, total= 27.2s [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent, total= 28.3s [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent, total= 27.1s [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent, total= 29.7s [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=10, preparation__num__imputer__strategy=most_frequent, total= 30.3s [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean, total= 35.3s [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean, total= 31.9s [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean, total= 32.4s [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean, total= 34.6s [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean [CV] feature_selection__k=11, preparation__num__imputer__strategy=mean, total= 33.3s [CV] feature_selection__k=11, preparation__num__imputer__strategy=median [CV] feature_selection__k=11, preparation__num__imputer__strategy=median, total= 29.7s [CV] feature_selection__k=11, preparation__num__imputer__strategy=median [CV] feature_selection__k=11, preparation__num__imputer__strategy=median, total= 29.1s [CV] feature_selection__k=11, preparation__num__imputer__strategy=median [CV] feature_selection__k=11, preparation__num__imputer__strategy=median, total= 29.8s [CV] feature_selection__k=11, preparation__num__imputer__strategy=median [CV] feature_selection__k=11, preparation__num__imputer__strategy=median, total= 35.1s [CV] feature_selection__k=11, preparation__num__imputer__strategy=median [CV] feature_selection__k=11, preparation__num__imputer__strategy=median, total= 33.9s [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent, total= 35.8s [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent, total= 28.8s [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent, total= 29.8s [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent, total= 32.5s [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=11, preparation__num__imputer__strategy=most_frequent, total= 38.6s [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean, total= 36.1s [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean, total= 35.2s [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean, total= 34.8s [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean, total= 35.4s [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean [CV] feature_selection__k=12, preparation__num__imputer__strategy=mean, total= 36.9s [CV] feature_selection__k=12, preparation__num__imputer__strategy=median [CV] feature_selection__k=12, preparation__num__imputer__strategy=median, total= 32.9s [CV] feature_selection__k=12, preparation__num__imputer__strategy=median [CV] feature_selection__k=12, preparation__num__imputer__strategy=median, total= 34.1s [CV] feature_selection__k=12, preparation__num__imputer__strategy=median [CV] feature_selection__k=12, preparation__num__imputer__strategy=median, total= 37.2s [CV] feature_selection__k=12, preparation__num__imputer__strategy=median [CV] feature_selection__k=12, preparation__num__imputer__strategy=median, total= 34.6s [CV] feature_selection__k=12, preparation__num__imputer__strategy=median [CV] feature_selection__k=12, preparation__num__imputer__strategy=median, total= 34.3s [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent, total= 32.0s [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent, total= 33.9s [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent, total= 40.5s [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent, total= 32.8s [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=12, preparation__num__imputer__strategy=most_frequent, total= 33.3s [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean, total= 43.1s [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean, total= 39.2s [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean, total= 41.8s [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean, total= 38.3s [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean [CV] feature_selection__k=13, preparation__num__imputer__strategy=mean, total= 32.2s [CV] feature_selection__k=13, preparation__num__imputer__strategy=median [CV] feature_selection__k=13, preparation__num__imputer__strategy=median, total= 34.9s [CV] feature_selection__k=13, preparation__num__imputer__strategy=median [CV] feature_selection__k=13, preparation__num__imputer__strategy=median, total= 41.3s [CV] feature_selection__k=13, preparation__num__imputer__strategy=median [CV] feature_selection__k=13, preparation__num__imputer__strategy=median, total= 43.2s [CV] feature_selection__k=13, preparation__num__imputer__strategy=median [CV] feature_selection__k=13, preparation__num__imputer__strategy=median, total= 42.1s [CV] feature_selection__k=13, preparation__num__imputer__strategy=median [CV] feature_selection__k=13, preparation__num__imputer__strategy=median, total= 38.0s [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent, total= 35.4s [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent, total= 41.2s [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent, total= 42.5s [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent, total= 41.5s [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=13, preparation__num__imputer__strategy=most_frequent, total= 37.2s [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean, total= 34.6s [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean, total= 40.4s [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean, total= 40.6s [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean, total= 40.8s [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean [CV] feature_selection__k=14, preparation__num__imputer__strategy=mean, total= 37.6s [CV] feature_selection__k=14, preparation__num__imputer__strategy=median [CV] feature_selection__k=14, preparation__num__imputer__strategy=median, total= 39.9s [CV] feature_selection__k=14, preparation__num__imputer__strategy=median [CV] feature_selection__k=14, preparation__num__imputer__strategy=median, total= 41.6s [CV] feature_selection__k=14, preparation__num__imputer__strategy=median [CV] feature_selection__k=14, preparation__num__imputer__strategy=median, total= 40.6s [CV] feature_selection__k=14, preparation__num__imputer__strategy=median [CV] feature_selection__k=14, preparation__num__imputer__strategy=median, total= 40.5s [CV] feature_selection__k=14, preparation__num__imputer__strategy=median [CV] feature_selection__k=14, preparation__num__imputer__strategy=median, total= 36.7s [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent, total= 40.3s [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent, total= 35.5s [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent, total= 37.4s [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent, total= 41.7s [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=14, preparation__num__imputer__strategy=most_frequent, total= 48.7s [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean, total= 42.0s [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean, total= 39.7s [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean, total= 42.7s [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean, total= 33.6s [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean [CV] feature_selection__k=15, preparation__num__imputer__strategy=mean, total= 39.2s [CV] feature_selection__k=15, preparation__num__imputer__strategy=median [CV] feature_selection__k=15, preparation__num__imputer__strategy=median, total= 35.7s [CV] feature_selection__k=15, preparation__num__imputer__strategy=median [CV] feature_selection__k=15, preparation__num__imputer__strategy=median, total= 39.9s [CV] feature_selection__k=15, preparation__num__imputer__strategy=median [CV] feature_selection__k=15, preparation__num__imputer__strategy=median, total= 43.8s [CV] feature_selection__k=15, preparation__num__imputer__strategy=median [CV] feature_selection__k=15, preparation__num__imputer__strategy=median, total= 43.8s [CV] feature_selection__k=15, preparation__num__imputer__strategy=median [CV] feature_selection__k=15, preparation__num__imputer__strategy=median, total= 40.6s [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent, total= 43.5s [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent, total= 43.2s [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent, total= 44.3s [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent, total= 36.1s [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=15, preparation__num__imputer__strategy=most_frequent, total= 44.3s [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean, total= 41.2s [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean, total= 42.2s [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean, total= 39.4s [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean, total= 42.5s [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean [CV] feature_selection__k=16, preparation__num__imputer__strategy=mean, total= 36.2s [CV] feature_selection__k=16, preparation__num__imputer__strategy=median [CV] feature_selection__k=16, preparation__num__imputer__strategy=median, total= 37.7s [CV] feature_selection__k=16, preparation__num__imputer__strategy=median [CV] feature_selection__k=16, preparation__num__imputer__strategy=median, total= 41.7s [CV] feature_selection__k=16, preparation__num__imputer__strategy=median [CV] feature_selection__k=16, preparation__num__imputer__strategy=median, total= 39.9s [CV] feature_selection__k=16, preparation__num__imputer__strategy=median [CV] feature_selection__k=16, preparation__num__imputer__strategy=median, total= 34.3s [CV] feature_selection__k=16, preparation__num__imputer__strategy=median [CV] feature_selection__k=16, preparation__num__imputer__strategy=median, total= 40.7s [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent, total= 36.9s [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent, total= 41.7s [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent, total= 39.2s [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent, total= 39.4s [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent [CV] feature_selection__k=16, preparation__num__imputer__strategy=most_frequent, total= 42.3s
[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed: 99.1min finished
GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('preparation',
                                        ColumnTransformer(n_jobs=None,
                                                          remainder='drop',
                                                          sparse_threshold=0.3,
                                                          transformer_weights=None,
                                                          transformers=[('num',
                                                                         Pipeline(memory=None,
                                                                                  steps=[('imputer',
                                                                                          SimpleImputer(add_indicator=False,
                                                                                                        copy=True,
                                                                                                        fill_value=None,
                                                                                                        missing_values=nan,
                                                                                                        strategy='median',
                                                                                                        verbose=0)),
                                                                                         (...
                                            kernel='rbf', max_iter=-1,
                                            shrinking=True, tol=0.001,
                                            verbose=False))],
                                verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'feature_selection__k': [1, 2, 3, 4, 5, 6, 7, 8, 9,
                                                   10, 11, 12, 13, 14, 15, 16],
                          'preparation__num__imputer__strategy': ['mean',
                                                                  'median',
                                                                  'most_frequent']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)
grid_search_prep.best_params_
{'feature_selection__k': 15,
 'preparation__num__imputer__strategy': 'most_frequent'}
최상의 Imputer 정책은 most_frequent이고 거의 모든 특성이 유용합니다(16개 중 15개). 마지막 특성(ISLAND)은 잡음이 추가될 뿐입니다.