import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)

import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)


s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))

s

0    -0.204708
10    0.274236
20   -0.245203
30   -0.800933
40    1.164847
50    2.558253
60    2.651161
70    2.932907
80    3.701930
90    4.948364
dtype: float64


s.plot()

<AxesSubplot:>


df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))

df


df.plot()

<AxesSubplot:>


np.random.seed(12345)
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df.plot(subplots=True)

array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
      dtype=object)


np.random.seed(12345)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))


data

a    0.929616
b    0.316376
c    0.183919
d    0.204560
e    0.567725
f    0.595545
g    0.964515
h    0.653177
i    0.748907
j    0.653570
k    0.747715
l    0.961307
m    0.008388
n    0.106444
o    0.298704
p    0.656411
dtype: float64


fig, axes = plt.subplots(2, 1)

data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<AxesSubplot:>


fig, axes = plt.subplots(2, 1)

data.plot(ax=axes[0], kind='bar', color='k', alpha=0.7)
data.plot(ax=axes[1], kind='barh', color='k', alpha=0.7)

<AxesSubplot:>


np.random.seed(12348)

df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df


df.plot.bar()

<AxesSubplot:>


df.plot.barh()

<AxesSubplot:>


df.plot.bar(stacked=True, alpha=0.5)

<AxesSubplot:>


df.plot.barh(stacked=True, alpha=0.5)

<AxesSubplot:>


df.plot(kind='bar', alpha=0.5)

<AxesSubplot:>


df.plot(kind='barh', stacked=True, alpha=0.5)

<AxesSubplot:>


tips_path = 'https://raw.githubusercontent.com/codingalzi/pydata/master/notebooks/examples/tips.csv'


tips = pd.read_csv(tips_path)

tips


tips['smoker'].unique()

array(['No', 'Yes'], dtype=object)


tips['day'].unique()

array(['Sun', 'Sat', 'Thur', 'Fri'], dtype=object)


tips['time'].unique()

array(['Dinner', 'Lunch'], dtype=object)


tips['size'].unique()

array([2, 3, 4, 1, 6, 5], dtype=int64)


party_counts = pd.crosstab(tips['day'], tips['size'])

party_counts


# 1인과 6인 파티 제외
party_counts = party_counts.loc[:, 2:5]

party_counts


# 각 행의 합이 1이 되도록 정규화
party_pcts = party_counts.div(party_counts.sum(1), axis=0)

party_pcts


party_pcts.plot.bar()

<AxesSubplot:xlabel='day'>


import seaborn as sns


tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

tips.head()


sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>


sns.barplot(x='tip_pct', y='day', data=tips, orient='h', ci='sd')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>


sns.barplot(x='tip_pct', y='day', data=tips, orient='h', ci=None)

<AxesSubplot:xlabel='tip_pct', ylabel='day'>


sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>


day_thur = tips['day']=='Thur'
time_dinner = tips['time']=='Dinner'


tips[day_thur & time_dinner]


sns.set_theme(style="darkgrid")


sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>


tips.tip_pct.max()

2.452380952380953


tips['tip_pct'].plot.hist(bins=50)

<AxesSubplot:ylabel='Frequency'>


tips['tip_pct'].plot.density()

<AxesSubplot:ylabel='Density'>


np.random.seed(12345)

comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)

values = pd.Series(np.concatenate([comp1, comp2]))

sns.histplot(values, bins=100, color='k', kde=True)

<AxesSubplot:ylabel='Count'>


np.random.seed(12345)

comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)

values = pd.Series(np.concatenate([comp1, comp2]))

sns.displot(values, bins=100, color='k', kde=True)

<seaborn.axisgrid.FacetGrid at 0x21b82f98fa0>


np.random.seed(12345)

comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)

values = pd.Series(np.concatenate([comp1, comp2]))

sns.displot(values, bins=100, color='k', kde=True, height=5, aspect=1.7)

<seaborn.axisgrid.FacetGrid at 0x21b82faf820>


data = tips['tip_pct']


sns.histplot(data, kde=True)

<AxesSubplot:xlabel='tip_pct', ylabel='Count'>


import statsmodels.api as sm

macro = sm.datasets.macrodata.load_pandas().data
macro


data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
data


trans_data = np.log(data).diff().dropna()
trans_data


sns.regplot(x='m1', y='unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')


correlation = trans_data.corr()
correlation


sns.heatmap(correlation)

<AxesSubplot:>


sns.regplot(x='m1', y='unemp', data=trans_data, fit_reg=False)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')


sns.pairplot(trans_data, plot_kws={'alpha': 0.2})

<seaborn.axisgrid.PairGrid at 0x21b8549cb20>


sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<seaborn.axisgrid.PairGrid at 0x21b85dadfd0>


sns.catplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

<seaborn.axisgrid.FacetGrid at 0x21b872bab50>


sns.catplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])

<seaborn.axisgrid.FacetGrid at 0x21b8799c850>


sns.catplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])

<seaborn.axisgrid.FacetGrid at 0x21b879c4a60>


sns.catplot(x='day', y='tip_pct', row='time', kind='bar', data=tips)

<seaborn.axisgrid.FacetGrid at 0x21b87698760>


sns.catplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct < 0.5])

<seaborn.axisgrid.FacetGrid at 0x21b8758fb50>

	A	B	C	D
0	1.007189	-1.296221	0.274992	0.228913
10	2.360106	-0.409792	-1.726646	-0.142930
20	4.029132	-0.848362	-2.266387	0.334055
30	7.278075	-1.869589	-2.843474	0.458177
40	7.580689	-1.345817	-2.842534	1.801986
50	6.867145	-2.176970	-5.212766	-0.058774
60	6.006388	-1.616825	-6.478700	0.061053
70	4.942875	-1.283942	-8.838119	-0.138490
80	3.400880	-2.254678	-10.145149	0.147860
90	3.778864	-3.008565	-9.813864	1.497602

Genus	A	B	C	D
one	0.370670	0.602792	0.229159	0.486744
two	0.420082	0.571653	0.049024	0.880592
three	0.814568	0.277160	0.880316	0.431326
four	0.374020	0.899420	0.460304	0.100843
five	0.433270	0.125107	0.494675	0.961825
six	0.601648	0.478576	0.205690	0.560547

size	2	3	4	5
day
Fri	0.888889	0.055556	0.055556	0.000000
Sat	0.623529	0.211765	0.152941	0.011765
Sun	0.520000	0.200000	0.240000	0.040000
Thur	0.827586	0.068966	0.086207	0.017241

	year	quarter	realgdp	realcons	realinv	realgovt	realdpi	cpi	m1	tbilrate	unemp	pop	infl	realint
0	1959.0	1.0	2710.349	1707.4	286.898	470.045	1886.9	28.980	139.7	2.82	5.8	177.146	0.00	0.00
1	1959.0	2.0	2778.801	1733.7	310.859	481.301	1919.7	29.150	141.7	3.08	5.1	177.830	2.34	0.74
2	1959.0	3.0	2775.488	1751.8	289.226	491.260	1916.4	29.350	140.5	3.82	5.3	178.657	2.74	1.09
3	1959.0	4.0	2785.204	1753.7	299.356	484.052	1931.3	29.370	140.0	4.33	5.6	179.386	0.27	4.06
4	1960.0	1.0	2847.699	1770.5	331.722	462.199	1955.5	29.540	139.6	3.50	5.2	180.007	2.31	1.19
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
198	2008.0	3.0	13324.600	9267.7	1990.693	991.551	9838.3	216.889	1474.7	1.17	6.0	305.270	-3.16	4.33
199	2008.0	4.0	13141.920	9195.3	1857.661	1007.273	9920.4	212.174	1576.5	0.12	6.9	305.952	-8.79	8.91
200	2009.0	1.0	12925.410	9209.2	1558.494	996.287	9926.4	212.671	1592.8	0.22	8.1	306.547	0.94	-0.71
201	2009.0	2.0	12901.504	9189.0	1456.678	1023.528	10077.5	214.469	1653.6	0.18	9.2	307.226	3.37	-3.19
202	2009.0	3.0	12990.341	9256.0	1486.398	1044.088	10040.6	216.385	1673.9	0.12	9.6	308.013	3.56	-3.44

지표	설명
year	1959년 1분기 - 2009년 3분기
quarter	1 - 4분기
realgdp	실질 국내총생산 (단위: 억 달러)
realcons	실질 총민간 소비지출 (단위: 억 달러)
realinv	실질 총민간 국내투자 (단위: 억 달러)
realgovt	실질 연방 정부 소비지출 및 국내투자 (단위: 억 달러)
realdpi	실질 가처분 소득 (단위: 억 달러)
cpi	소비자 물가지수
m1	M1 통화지표
tbilrate	3개월 만기 국채 수익률
unemp	실업률
pop	인구
infl	물가상승률
realint	실질 이자율

그래프와 시각화 3편¶

pandas로 그래프 그리기¶

시리즈와 선그래프¶

데이터프레임과 선그래프¶

시리즈와 막대그래프¶

plot() 메서드의 `kind=bar`와 `kind=barh` 옵션¶

데이터프레임과 막대그래프¶

누적막대그래프¶

plot() 메서드의 `kind=bar`와 `kind=barh` 옵션¶

예제: 서비스 팁(tip) 데이터¶

교차 테이블¶

행별 정규화(Normalization)¶

seanborn으로 그래프 그리기¶

`seaborn.barplot()` 함수¶

seaborn 기본 설정¶

히스토그램과 밀도그래프¶

`seaborn.histplot()` 함수¶

`seaborn.displot()` 함수¶

예제: 서비스 팁 비율의 히스토그램과 밀도 그래프¶

산점도¶

산점도와 선형회귀 곡선¶

산점도 행렬¶

패싯 그리드(Facet Grids)과 범주형 데이터¶

기타 시각화 도구 안내¶

	total_bill	tip	smoker	day	time	size
0	16.99	1.01	No	Sun	Dinner	2
1	10.34	1.66	No	Sun	Dinner	3
2	21.01	3.50	No	Sun	Dinner	3
3	23.68	3.31	No	Sun	Dinner	2
4	24.59	3.61	No	Sun	Dinner	4
...	...	...	...	...	...	...
239	29.03	5.92	No	Sat	Dinner	3
240	27.18	2.00	Yes	Sat	Dinner	2
241	22.67	2.00	Yes	Sat	Dinner	2
242	17.82	1.75	No	Sat	Dinner	2
243	18.78	3.00	No	Thur	Dinner	2

	cpi	m1	tbilrate	unemp
1	0.005849	0.014215	0.088193	-0.128617
2	0.006838	-0.008505	0.215321	0.038466
3	0.000681	-0.003565	0.125317	0.055060
4	0.005772	-0.002861	-0.212805	-0.074108
5	0.000338	0.004289	-0.266946	0.000000
...	...	...	...	...
198	-0.007904	0.045361	-0.396881	0.105361
199	-0.021979	0.066753	-2.277267	0.139762
200	0.002340	0.010286	0.606136	0.160343
201	0.008419	0.037461	-0.200671	0.127339
202	0.008894	0.012202	-0.405465	0.042560

	cpi	m1	tbilrate	unemp
cpi	1.000000	-0.050535	0.329456	0.019750
m1	-0.050535	1.000000	-0.360278	0.150729
tbilrate	0.329456	-0.360278	1.000000	-0.371491
unemp	0.019750	0.150729	-0.371491	1.000000

그래프와 시각화 3편¶

pandas로 그래프 그리기¶

시리즈와 선그래프¶

데이터프레임과 선그래프¶

시리즈와 막대그래프¶

plot() 메서드의 kind=bar와 kind=barh 옵션¶

데이터프레임과 막대그래프¶

누적막대그래프¶

plot() 메서드의 kind=bar와 kind=barh 옵션¶

예제: 서비스 팁(tip) 데이터¶

교차 테이블¶

행별 정규화(Normalization)¶

seanborn으로 그래프 그리기¶

seaborn.barplot() 함수¶

seaborn 기본 설정¶

히스토그램과 밀도그래프¶

seaborn.histplot() 함수¶

seaborn.displot() 함수¶

예제: 서비스 팁 비율의 히스토그램과 밀도 그래프¶

산점도¶

산점도와 선형회귀 곡선¶

산점도 행렬¶

패싯 그리드(Facet Grids)과 범주형 데이터¶

기타 시각화 도구 안내¶

plot() 메서드의 `kind=bar`와 `kind=barh` 옵션¶

plot() 메서드의 `kind=bar`와 `kind=barh` 옵션¶

`seaborn.barplot()` 함수¶

`seaborn.histplot()` 함수¶

`seaborn.displot()` 함수¶