데이터시각화 중간고사 연습문제
작성 완료
import pandas as pd
import numpy as np
from plotnine import *
import matplotlib.pyplot as plt
import seaborn as sns
-
흠 20분 걸렸는데... 조금 느리다
np.random.seed(202016248)
x1 = np.random.normal(0, 1, 10000)
x2 = np.random.normal(1, 1, 10000)
sns.histplot([x1, x2])
x = [1, 2, 3, 4]
y = [1, 2, 4, 3]
fig, axs = plt.subplots(2,2)
(ax1, ax2), (ax3, ax4) = axs
ax1.plot(x, y, 'o:r')
ax2.plot(x, y, 'Xb')
ax3.plot(x, y, 'xm')
ax4.plot(x, y, '.--k')
fig
(1)
하니, 홍두깨
(2)
하니, 나애리
df = pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/employee.csv')
df
df.info()
df.isnull().sum()
df.isnull().sum().sum()
df = df.dropna()
df.info()
df.isnull().sum()
df.isnull().sum().sum()
df_ = df.groupby(by = 'GENDER').agg({'BASE_SALARY':np.mean})
df_
df_ = df_.stack().reset_index()
df_
(df_.query('GENDER == "Female"'))[0].to_list() > (df_.query('GENDER == "Male"'))[0].to_list()
-
남자 급여평균이 더 크다
df2 = df.groupby(by = ['RACE','GENDER']).agg({'BASE_SALARY':[np.mean, min, max]})
df2
-
tidydata로 만들자!
df2.stack()
df3 = df2.stack().reset_index().rename(columns={'level_2':'aggtype'})
df3
ggplot(data = df3) + geom_bar(aes(x = 'aggtype', y = 'BASE_SALARY', fill = 'GENDER'), stat = 'identity', position = 'dodge')\
+ coord_flip() + facet_wrap(facets = 'RACE')