# 파이썬 버전 ≥ 3.5
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn 버전 ≥0.20
import sklearn
assert sklearn.__version__ >= "0.20"


%matplotlib inline 

import matplotlib as mpl
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


import os

# 그래프 저장을 디렉토리 지정
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

# 그래프 저장 함수 선언
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)


import numpy as np

np.random.seed(42)


datapath = "https://raw.githubusercontent.com/codingalzi/handson-ml2/master/notebooks/datasets/lifesat/"


gdp_per_capita = pd.read_csv(datapath+"gdp_per_capita.csv", 
                             thousands=',', 
                             delimiter='\t',
                             encoding='latin1', 
                             na_values="n/a")


gdp_per_capita


gdp_per_capita.set_index("Country", inplace=True)

gdp_per_capita.head()


gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True) 

gdp_per_capita.head()


oecd_bli = pd.read_csv(datapath + "oecd_bli_2015.csv", thousands=',')


oecd_bli.shape

(3292, 17)


oecd_bli.head()


oecd_bli.Indicator.unique()

array(['Dwellings without basic facilities', 'Housing expenditure',
       'Rooms per person', 'Household net adjusted disposable income',
       'Household net financial wealth', 'Employment rate',
       'Job security', 'Long-term unemployment rate', 'Personal earnings',
       'Quality of support network', 'Educational attainment',
       'Student skills', 'Years in education', 'Air pollution',
       'Water quality', 'Consultation on rule-making', 'Voter turnout',
       'Life expectancy', 'Self-reported health', 'Life satisfaction',
       'Assault rate', 'Homicide rate',
       'Employees working very long hours',
       'Time devoted to leisure and personal care'], dtype=object)


oecd_bli.Indicator.unique().shape

(24,)


'Life satisfaction' in oecd_bli.Indicator.unique()

True


mask = oecd_bli.Indicator == 'Life satisfaction'
oecd_bli[mask].shape

(179, 17)


oecd_bli.INEQUALITY.unique()

array(['TOT', 'MN', 'WMN', 'HGH', 'LW'], dtype=object)


arr1 = oecd_bli.INEQUALITY.unique()

sum = 0
for ineq in arr1:
    num_lines = (oecd_bli['INEQUALITY'] == ineq).sum()
    sum += num_lines
    print(f"{ineq:>3}:\t{num_lines}행")

print(f"\n 총:\t{sum}행")

TOT:	888행
 MN:	881행
WMN:	881행
HGH:	328행
 LW:	314행

 총:	3292행


oecd_bli = oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]


oecd_bli.shape

(888, 17)


oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")


oecd_bli


oecd_bli.loc['Korea']

Indicator
Air pollution                                   30.00
Assault rate                                     2.10
Consultation on rule-making                     10.40
Dwellings without basic facilities               4.20
Educational attainment                          82.00
Employees working very long hours               18.72
Employment rate                                 64.00
Homicide rate                                    1.10
Household net adjusted disposable income     19510.00
Household net financial wealth               29091.00
Housing expenditure                             16.00
Job security                                     3.20
Life expectancy                                 81.30
Life satisfaction                                5.80
Long-term unemployment rate                      0.01
Personal earnings                            36354.00
Quality of support network                      72.00
Rooms per person                                 1.40
Self-reported health                            35.00
Student skills                                 542.00
Time devoted to leisure and personal care       14.63
Voter turnout                                   76.00
Water quality                                   78.00
Years in education                              17.50
Name: Korea, dtype: float64


oecd_bli["Life satisfaction"].head()

Country
Australia    7.3
Austria      6.9
Belgium      6.9
Brazil       7.0
Canada       7.3
Name: Life satisfaction, dtype: float64


oecd_country_stats = pd.merge(left=gdp_per_capita['GDP per capita'],
                              right=oecd_bli['Life satisfaction'], 
                              left_index=True, right_index=True)


oecd_country_stats.sort_values(by="GDP per capita", inplace=True)

oecd_country_stats


remove_indices = [0, 1, 6, 8, 33, 34, 35]

keep_indices = list(set(range(36)) - set(remove_indices))


missing_data = oecd_country_stats.iloc[remove_indices]

missing_data


sample_data = oecd_country_stats.iloc[keep_indices]


# 7개 국가를 제외한 국가들의 데이터 산점도
sample_data.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(5,3))
plt.axis([0, 60000, 0, 10])

# 언급된 5개 국가명 명기 좌표
position_text = {
    "Hungary": (5000, 1),
    "Korea": (18000, 1.7),
    "France": (29000, 2.4),
    "Australia": (40000, 3.0),
    "United States": (52000, 3.8),
}

# 5개 국가는 좌표를 이용하여 빨강색 점으로 표기
for country, pos_text in position_text.items():
    pos_data_x, pos_data_y = sample_data.loc[country]
    
    # 5개 국가명 표기
    country = "U.S." if country == "United States" else country
    plt.annotate(country, xy=(pos_data_x, pos_data_y), xytext=pos_text,
            arrowprops=dict(facecolor='black', width=0.5, shrink=0.1, headwidth=5))
    
    # 5개 국가 산점도 그리기
    plt.plot(pos_data_x, pos_data_y, "ro")

plt.xlabel("GDP per capita (USD)")

plt.show()


sample_data.loc[list(position_text.keys())]


import numpy as np

sample_data.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(5,3))
plt.xlabel("GDP per capita (USD)")
plt.axis([0, 60000, 0, 10])

# 직서 그리기
X=np.linspace(0, 60000, 1000)

# 빨강 직선
plt.plot(X, 2*X/100000, "r")
plt.text(40000, 2.7, r"$\theta_0 = 0$", fontsize=14, color="r")
plt.text(40000, 1.8, r"$\theta_1 = 2 \times 10^{-5}$", fontsize=14, color="r")

# 초록 직선
plt.plot(X, 8 - 5*X/100000, "g")
plt.text(5000, 9.1, r"$\theta_0 = 8$", fontsize=14, color="g")
plt.text(5000, 8.2, r"$\theta_1 = -5 \times 10^{-5}$", fontsize=14, color="g")

# 파랑 직선
plt.plot(X, 4 + 5*X/100000, "b")
plt.text(5000, 3.5, r"$\theta_0 = 4$", fontsize=14, color="b")
plt.text(5000, 2.6, r"$\theta_1 = 5 \times 10^{-5}$", fontsize=14, color="b")

plt.show()


from sklearn import linear_model

lin1 = linear_model.LinearRegression()


Xsample = np.c_[sample_data["GDP per capita"]]
ysample = np.c_[sample_data["Life satisfaction"]]


Xsample[:5]

array([[ 9054.914],
       [ 9437.372],
       [12239.894],
       [12495.334],
       [15991.736]])


ysample[:5]

array([[6. ],
       [5.6],
       [4.9],
       [5.8],
       [6.1]])


lin1.fit(Xsample, ysample)

LinearRegression()


t0, t1 = lin1.intercept_[0], lin1.coef_[0][0]

print(f"절편:\t {t0}t1")
print(f"기울기:\t {t1}t1")

절편:	 4.853052800266436t1
기울기:	 4.911544589158483e-05t1


# 산점도
sample_data.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(5,3))
plt.xlabel("GDP per capita (USD)")
plt.axis([0, 60000, 0, 10])

# 직선 그리기
X=np.linspace(0, 60000, 1000)
plt.plot(X, t0 + t1*X, "b")
# 직선의 절편과 기울기 정보 명시
plt.text(5000, 3.1, r"$\theta_0 = 4.85$", fontsize=14, color="b")
plt.text(5000, 2.2, r"$\theta_1 = 4.91 \times 10^{-5}$", fontsize=14, color="b")

plt.show()


cyprus_gdp_per_capita = gdp_per_capita.loc["Cyprus"]["GDP per capita"]

cyprus_gdp_per_capita

22587.49


cyprus_predicted_life_satisfaction = lin1.predict([[cyprus_gdp_per_capita]])[0, 0]

cyprus_predicted_life_satisfaction

5.96244744318815


sample_data.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(5,3), s=1)
plt.xlabel("GDP per capita (USD)")

# 예측된 최적의 직선
X=np.linspace(0, 60000, 1000)
plt.plot(X, t0 + t1*X, "b")
plt.axis([0, 60000, 0, 10])
plt.text(5000, 7.5, r"$\theta_0 = 4.85$", fontsize=14, color="b")
plt.text(5000, 6.6, r"$\theta_1 = 4.91 \times 10^{-5}$", fontsize=14, color="b")

# 키프러스에 대한 삶의 만족도 예측값

# 빨간 점선 그리기
plt.plot([cyprus_gdp_per_capita, cyprus_gdp_per_capita], [0, cyprus_predicted_life_satisfaction], "r--")
plt.text(25000, 5.0, r"Prediction = 5.96", fontsize=14, color="b")

# 예측 지점 좌표 찍기(빨강생)
plt.plot(cyprus_gdp_per_capita, cyprus_predicted_life_satisfaction, "ro")

plt.show()


missing_data


position_text2 = {
    "Brazil": (1000, 9.0),
    "Mexico": (11000, 9.0),
    "Chile": (25000, 9.0),
    "Czech Republic": (35000, 9.0),
    "Norway": (60000, 3),
    "Switzerland": (72000, 3.0),
    "Luxembourg": (90000, 3.0),
}


# 7개 국가를 제외한 국가들의 산점도 (파랑색 점)
sample_data.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(8,3))
plt.axis([0, 110000, 0, 10])

# 7개 국가 산점도(빨강 점)
for country, pos_text in position_text2.items():
    pos_data_x, pos_data_y = missing_data.loc[country]
    # 7개 국가명 표기
    plt.annotate(country, xy=(pos_data_x, pos_data_y), xytext=pos_text,
            arrowprops=dict(facecolor='black', width=0.5, shrink=0.1, headwidth=5))
    plt.plot(pos_data_x, pos_data_y, "rs")

# 7개 국가 제외 예측 선형 모델 그래프 (파랑 점선)
X=np.linspace(0, 110000, 1000)
plt.plot(X, t0 + t1*X, "b:")

# 7개 국가 포함 선형회귀 모델 훈련 및 예측
lin_reg_full = linear_model.LinearRegression()
Xfull = np.c_[oecd_country_stats["GDP per capita"]]
yfull = np.c_[oecd_country_stats["Life satisfaction"]]
lin_reg_full.fit(Xfull, yfull)

# 7개 국가 포함 예측 선형 모델 그래프(검정 실선)
t0full, t1full = lin_reg_full.intercept_[0], lin_reg_full.coef_[0][0]
X = np.linspace(0, 110000, 1000)
plt.plot(X, t0full + t1full * X, "k")
plt.xlabel("GDP per capita (USD)")

plt.show()


oecd_country_stats.plot(kind='scatter', x="GDP per capita", y='Life satisfaction', figsize=(8,3))
plt.axis([0, 110000, 0, 10])

from sklearn import preprocessing
from sklearn import pipeline

poly = preprocessing.PolynomialFeatures(degree=60, include_bias=False)
scaler = preprocessing.StandardScaler()
lin_reg2 = linear_model.LinearRegression()

pipeline_reg = pipeline.Pipeline([('poly', poly), ('scal', scaler), ('lin', lin_reg2)])
pipeline_reg.fit(Xfull, yfull)
curve = pipeline_reg.predict(X[:, np.newaxis])
plt.plot(X, curve)
plt.xlabel("GDP per capita (USD)")
save_fig('overfitting_model_plot')
plt.show()

/Users/gslee/opt/anaconda3/lib/python3.8/site-packages/numpy/lib/nanfunctions.py:1544: RuntimeWarning: overflow encountered in multiply
  sqr = np.multiply(arr, arr, out=arr)
/Users/gslee/opt/anaconda3/lib/python3.8/site-packages/numpy/core/fromnumeric.py:87: RuntimeWarning: overflow encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)

Saving figure overfitting_model_plot


oecd_country_stats.loc[[c for c in oecd_country_stats.index if "W" in c.upper()]]["Life satisfaction"]

Country
New Zealand    7.3
Sweden         7.2
Norway         7.4
Switzerland    7.5
Name: Life satisfaction, dtype: float64


gdp_per_capita.loc[[c for c in gdp_per_capita.index if "W" in c.upper()]].head()


plt.figure(figsize=(8,3))

plt.xlabel("GDP per capita")
plt.ylabel('Life satisfaction')

# 7개 국가 제외 데이터 산점도(파랑 동그라미)
plt.plot(list(sample_data["GDP per capita"]), list(sample_data["Life satisfaction"]), "bo")
# 제외된 7개 국가 산점도(빨강 네모)
plt.plot(list(missing_data["GDP per capita"]), list(missing_data["Life satisfaction"]), "rs")

# 7개 국가 포함 예측 선형 모델 그래프(빨강 파선)
X = np.linspace(0, 110000, 1000)
plt.plot(X, t0full + t1full * X, "r--", label="Linear model on all data")
# 7개 국가 제외 예측 선형 모델 그래프(파랑 점선)
plt.plot(X, t0 + t1*X, "b:", label="Linear model on partial data")

# 릿지(Ridge) 규제 적용 훈련: 7개 국가 제외 데이터 대상
ridge = linear_model.Ridge(alpha=10**9.5)
Xsample = np.c_[sample_data["GDP per capita"]]
ysample = np.c_[sample_data["Life satisfaction"]]
ridge.fit(Xsample, ysample)
# 릿지 규제 적용 예측 선형 모델 그래프(파랑 실선)
t0ridge, t1ridge = ridge.intercept_[0], ridge.coef_[0][0]
plt.plot(X, t0ridge + t1ridge * X, "b", label="Regularized linear model on partial data")

plt.legend(loc="lower right")
plt.axis([0, 110000, 0, 10])
plt.xlabel("GDP per capita (USD)")
save_fig('ridge_model_plot')
plt.show()

Saving figure ridge_model_plot


sample_data[7:10]


(5.1 + 5.7 + 6.5)/3

5.766666666666667


import sklearn.neighbors
model = sklearn.neighbors.KNeighborsRegressor(n_neighbors=3)


X = np.c_[sample_data["GDP per capita"]]
y = np.c_[sample_data["Life satisfaction"]]

# 모델 훈련
model.fit(X, y)

# 키프러스 국가에 대한 예측하기
X_new = np.array([[22587.0]])  # Cyprus' GDP per capita
print(model.predict(X_new)) # outputs [[ 5.76666667]]

[[5.76666667]]

	Country	Subject Descriptor	Units	Scale	Country/Series-specific Notes	2015	Estimates Start After
0	Afghanistan	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	599.994	2013.0
1	Albania	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	3995.383	2010.0
2	Algeria	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4318.135	2014.0
3	Angola	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4100.315	2014.0
4	Antigua and Barbuda	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	14414.302	2011.0
...	...	...	...	...	...	...	...
185	Vietnam	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	2088.344	2012.0
186	Yemen	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	1302.940	2008.0
187	Zambia	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	1350.151	2010.0
188	Zimbabwe	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	1064.350	2012.0
189	International Monetary Fund, World Economic Ou...	NaN	NaN	NaN	NaN	NaN	NaN

	Subject Descriptor	Units	Scale	Country/Series-specific Notes	2015	Estimates Start After
Country
Afghanistan	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	599.994	2013.0
Albania	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	3995.383	2010.0
Algeria	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4318.135	2014.0
Angola	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4100.315	2014.0
Antigua and Barbuda	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	14414.302	2011.0

	Subject Descriptor	Units	Scale	Country/Series-specific Notes	GDP per capita	Estimates Start After
Country
Afghanistan	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	599.994	2013.0
Albania	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	3995.383	2010.0
Algeria	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4318.135	2014.0
Angola	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	4100.315	2014.0
Antigua and Barbuda	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	14414.302	2011.0

	LOCATION	Country	INDICATOR	Indicator	MEASURE	Measure	INEQUALITY	Inequality	Unit Code	Unit	PowerCode	Reference Period Code	Reference Period	Value	Flag Codes	Flags
0	AUS	Australia	HO_BASE	Dwellings without basic facilities	L	Value	TOT	Total	PC	Percentage	units	NaN	NaN	1.1	E	Estimated value
1	AUT	Austria	HO_BASE	Dwellings without basic facilities	L	Value	TOT	Total	PC	Percentage	units	NaN	NaN	1.0	NaN	NaN
2	BEL	Belgium	HO_BASE	Dwellings without basic facilities	L	Value	TOT	Total	PC	Percentage	units	NaN	NaN	2.0	NaN	NaN
3	CAN	Canada	HO_BASE	Dwellings without basic facilities	L	Value	TOT	Total	PC	Percentage	units	NaN	NaN	0.2	NaN	NaN
4	CZE	Czech Republic	HO_BASE	Dwellings without basic facilities	L	Value	TOT	Total	PC	Percentage	units	NaN	NaN	0.9	NaN	NaN

기준	기호	대상
Total	TOT	전체 인구
Men	MN	남성
Wemen	WMN	여성
High	HGH	상위 소득
Low	LW	하위 소득

1장 한눈에 보는 머신러닝¶

주요 내용¶

기본 설정¶

모델 기반 학습¶

✋ 그래프 저장 함수 선언¶

무작위성 시드 지정¶

1단계: 문제 정의¶

2단계: 데이터 구하기¶

3단계: 데이터 적재, 정제, 전처리¶

1인당 GDP 데이터 적재, 정제, 전처리¶

삶의 만족도 데이터 적재, 정제, 전처리¶

데이터 병합¶

4단계: 모델 선택과 훈련¶

선형 관계 확인¶

선형회귀 모델¶

사이킷런 라이브러리 활용 선형회귀 모델 훈련¶

5단계: 학습된 모델 활용¶

머신러닝의 주요 도전 과제¶

대표성 없는 훈련 데이터¶

과대적합과 모델 규제¶

과대적합¶

모델 규제¶

사례 기반 학습¶

과제¶

Indicator	Air pollution	Assault rate	Consultation on rule-making	Dwellings without basic facilities	Educational attainment	Employees working very long hours	Employment rate	Homicide rate	Household net adjusted disposable income	Household net financial wealth	...	Long-term unemployment rate	Personal earnings	Quality of support network	Rooms per person	Self-reported health	Student skills	Time devoted to leisure and personal care	Voter turnout	Water quality	Years in education
Country
Australia	13.0	2.1	10.5	1.1	76.0	14.02	72.0	0.8	31588.0	47657.0	...	1.08	50449.0	92.0	2.3	85.0	512.0	14.41	93.0	91.0	19.4
Austria	27.0	3.4	7.1	1.0	83.0	7.61	72.0	0.4	31173.0	49887.0	...	1.19	45199.0	89.0	1.6	69.0	500.0	14.46	75.0	94.0	17.0
Belgium	21.0	6.6	4.5	2.0	72.0	4.57	62.0	1.1	28307.0	83876.0	...	3.88	48082.0	94.0	2.2	74.0	509.0	15.71	89.0	87.0	18.9
Brazil	18.0	7.9	4.0	6.7	45.0	10.41	67.0	25.5	11664.0	6844.0	...	1.97	17177.0	90.0	1.6	69.0	402.0	14.97	79.0	72.0	16.3
Canada	15.0	1.3	10.5	0.2	89.0	3.94	72.0	1.5	29365.0	67913.0	...	0.90	46911.0	92.0	2.5	89.0	522.0	14.25	61.0	91.0	17.2
Chile	46.0	6.9	2.0	9.4	57.0	15.42	62.0	4.4	14533.0	17733.0	...	1.59	22101.0	86.0	1.2	59.0	436.0	14.41	49.0	73.0	16.5
Czech Republic	16.0	2.8	6.8	0.9	92.0	6.98	68.0	0.8	18404.0	17299.0	...	3.12	20338.0	85.0	1.4	60.0	500.0	14.98	59.0	85.0	18.1
Denmark	15.0	3.9	7.0	0.9	78.0	2.03	73.0	0.3	26491.0	44488.0	...	1.78	48347.0	95.0	1.9	72.0	498.0	16.06	88.0	94.0	19.4
Estonia	9.0	5.5	3.3	8.1	90.0	3.30	68.0	4.8	15167.0	7680.0	...	3.82	18944.0	89.0	1.5	54.0	526.0	14.90	64.0	79.0	17.5
Finland	15.0	2.4	9.0	0.6	85.0	3.58	69.0	1.4	27927.0	18761.0	...	1.73	40060.0	95.0	1.9	65.0	529.0	14.89	69.0	94.0	19.7
France	12.0	5.0	3.5	0.5	73.0	8.15	64.0	0.6	28799.0	48741.0	...	3.99	40242.0	87.0	1.8	67.0	500.0	15.33	80.0	82.0	16.4
Germany	16.0	3.6	4.5	0.1	86.0	5.25	73.0	0.5	31252.0	50394.0	...	2.37	43682.0	94.0	1.8	65.0	515.0	15.31	72.0	95.0	18.2
Greece	27.0	3.7	6.5	0.7	68.0	6.16	49.0	1.6	18575.0	14579.0	...	18.39	25503.0	83.0	1.2	74.0	466.0	14.91	64.0	69.0	18.6
Hungary	15.0	3.6	7.9	4.8	82.0	3.19	58.0	1.3	15442.0	13277.0	...	5.10	20948.0	87.0	1.1	57.0	487.0	15.04	62.0	77.0	17.6
Iceland	18.0	2.7	5.1	0.4	71.0	12.25	82.0	0.3	23965.0	43045.0	...	1.18	55716.0	96.0	1.5	77.0	484.0	14.61	81.0	97.0	19.8
Ireland	13.0	2.6	9.0	0.2	75.0	4.20	60.0	0.8	23917.0	31580.0	...	8.39	49506.0	96.0	2.1	82.0	516.0	15.19	70.0	80.0	17.6
Israel	21.0	6.4	2.5	3.7	85.0	16.03	67.0	2.3	22104.0	52933.0	...	0.79	28817.0	87.0	1.2	80.0	474.0	14.48	68.0	68.0	15.8
Italy	21.0	4.7	5.0	1.1	57.0	3.66	56.0	0.7	25166.0	54987.0	...	6.94	34561.0	90.0	1.4	66.0	490.0	14.98	75.0	71.0	16.8
Japan	24.0	1.4	7.3	6.4	94.0	22.26	72.0	0.3	26111.0	86764.0	...	1.67	35405.0	89.0	1.8	30.0	540.0	14.93	53.0	85.0	16.3
Korea	30.0	2.1	10.4	4.2	82.0	18.72	64.0	1.1	19510.0	29091.0	...	0.01	36354.0	72.0	1.4	35.0	542.0	14.63	76.0	78.0	17.5
Luxembourg	12.0	4.3	6.0	0.1	78.0	3.47	66.0	0.4	38951.0	61765.0	...	1.78	56021.0	87.0	2.0	72.0	490.0	15.12	91.0	86.0	15.1
Mexico	30.0	12.8	9.0	4.2	37.0	28.83	61.0	23.4	13085.0	9056.0	...	0.08	16193.0	77.0	1.0	66.0	417.0	13.89	63.0	67.0	14.4
Netherlands	30.0	4.9	6.1	0.0	73.0	0.45	74.0	0.9	27888.0	77961.0	...	2.40	47590.0	90.0	2.0	76.0	519.0	15.44	75.0	92.0	18.7
New Zealand	11.0	2.2	10.3	0.2	74.0	13.87	73.0	1.2	23815.0	28290.0	...	0.75	35609.0	94.0	2.4	90.0	509.0	14.87	77.0	89.0	18.1
Norway	16.0	3.3	8.1	0.3	82.0	2.82	75.0	0.6	33492.0	8797.0	...	0.32	50282.0	94.0	2.0	76.0	496.0	15.56	78.0	94.0	17.9
OECD - Total	20.0	3.9	7.3	2.4	75.0	12.51	65.0	4.0	25908.0	67139.0	...	2.79	36118.0	88.0	1.8	68.0	497.0	14.97	68.0	81.0	17.7
Poland	33.0	1.4	10.8	3.2	90.0	7.41	60.0	0.9	17852.0	10919.0	...	3.77	22655.0	91.0	1.1	58.0	521.0	14.20	55.0	79.0	18.4
Portugal	18.0	5.7	6.5	0.9	38.0	9.62	61.0	1.1	20086.0	31245.0	...	9.11	23688.0	86.0	1.6	46.0	488.0	14.95	58.0	86.0	17.6
Russia	15.0	3.8	2.5	15.1	94.0	0.16	69.0	12.8	19292.0	3412.0	...	1.70	20885.0	90.0	0.9	37.0	481.0	14.97	65.0	56.0	16.0
Slovak Republic	13.0	3.0	6.6	0.6	92.0	7.02	60.0	1.2	17503.0	8663.0	...	9.46	20307.0	90.0	1.1	66.0	472.0	14.99	59.0	81.0	16.3
Slovenia	26.0	3.9	10.3	0.5	85.0	5.63	63.0	0.4	19326.0	18465.0	...	5.15	32037.0	90.0	1.5	65.0	499.0	14.62	52.0	88.0	18.4
Spain	24.0	4.2	7.3	0.1	55.0	5.89	56.0	0.6	22477.0	24774.0	...	12.96	34824.0	95.0	1.9	72.0	490.0	16.06	69.0	71.0	17.6
Sweden	10.0	5.1	10.9	0.0	88.0	1.13	74.0	0.7	29185.0	60328.0	...	1.37	40818.0	92.0	1.7	81.0	482.0	15.11	86.0	95.0	19.3
Switzerland	20.0	4.2	8.4	0.0	86.0	6.72	80.0	0.5	33491.0	108823.0	...	1.46	54236.0	96.0	1.8	81.0	518.0	14.98	49.0	96.0	17.3
Turkey	35.0	5.0	5.5	12.7	34.0	40.86	50.0	1.2	14095.0	3251.0	...	2.37	16919.0	86.0	1.1	68.0	462.0	13.42	88.0	62.0	16.4
United Kingdom	13.0	1.9	11.5	0.2	78.0	12.70	71.0	0.3	27029.0	60778.0	...	2.77	41192.0	91.0	1.9	74.0	502.0	14.83	66.0	88.0	16.4
United States	18.0	1.5	8.3	0.1	89.0	11.30	67.0	5.2	41355.0	145769.0	...	1.91	56340.0	90.0	2.4	88.0	492.0	14.27	68.0	85.0	17.2

	GDP per capita	Life satisfaction
Country
Brazil	8669.998	7.0
Mexico	9009.280	6.7
Russia	9054.914	6.0
Turkey	9437.372	5.6
Hungary	12239.894	4.9
Poland	12495.334	5.8
Chile	13340.905	6.7
Slovak Republic	15991.736	6.1
Czech Republic	17256.918	6.5
Estonia	17288.083	5.6
Greece	18064.288	4.8
Portugal	19121.592	5.1
Slovenia	20732.482	5.7
Spain	25864.721	6.5
Korea	27195.197	5.8
Italy	29866.581	6.0
Japan	32485.545	5.9
Israel	35343.336	7.4
New Zealand	37044.891	7.3
France	37675.006	6.5
Belgium	40106.632	6.9
Germany	40996.511	7.0
Finland	41973.988	7.4
Canada	43331.961	7.3
Netherlands	43603.115	7.3
Austria	43724.031	6.9
United Kingdom	43770.688	6.8
Sweden	49866.266	7.2
Iceland	50854.583	7.5
Australia	50961.865	7.3
Ireland	51350.744	7.0
Denmark	52114.165	7.5
United States	55805.204	7.2
Norway	74822.106	7.4
Switzerland	80675.308	7.5
Luxembourg	101994.093	6.9

	Subject Descriptor	Units	Scale	Country/Series-specific Notes	GDP per capita	Estimates Start After
Country
Botswana	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	6040.957	2008.0
Kuwait	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	29363.027	2014.0
Malawi	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	354.275	2011.0
New Zealand	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	37044.891	2015.0
Norway	Gross domestic product per capita, current prices	U.S. dollars	Units	See notes for: Gross domestic product, curren...	74822.106	2015.0