6. Seaborn和Pandas的不同点
# 读取employee数据集
In[74]: employee = pd.read_csv('data/employee.csv',
parse_dates=['HIRE_DATE', 'JOB_DATE'])
employee.head()
Out[74]:
# 用seaborn画出每个部门的柱状图
In[75]: import seaborn as sns
In[76]: sns.countplot(y='DEPARTMENT', data=employee)
Out[76]: <matplotlib.axes._subplots.AxesSubplot at 0x11e287128>
# 要是用pandas来做,需要先聚合数据
In[77]: employee['DEPARTMENT'].value_counts().plot('barh')
Out[77]: <matplotlib.axes._subplots.AxesSubplot at 0x11e30a240>
# 用seaborn找到每个种族的平均工资
In[78]: ax = sns.barplot(x='RACE', y='BASE_SALARY', data=employee)
ax.figure.set_size_inches(16, 4)
Out[78]:
# 用pandas来做,需要先按照race分组
In[79]: avg_sal = employee.groupby('RACE', sort=False)['BASE_SALARY'].mean()
ax = avg_sal.plot(kind='bar', rot=0, figsize=(16,4), width=.8)
ax.set_xlim(-.5, 5.5)
ax.set_ylabel('Mean Salary')
Out[79]: Text(0,0.5,'Mean Salary')
# seaborn还支持在分组内使用第三个参数
In[80]: ax = sns.barplot(x='RACE', y='BASE_SALARY', hue='GENDER',
data=employee, palette='Greys')
ax.figure.set_size_inches(16,4)
Out[80]:
# pandas则要对race和gender同时分组,并对gender做unstack
In[81]: employee.groupby(['RACE', 'GENDER'], sort=False)['BASE_SALARY'] \
.mean().unstack('GENDER') \
.plot(kind='bar', figsize=(16,4), rot=0,
width=.8, cmap='Greys')
Out[81]: <matplotlib.axes._subplots.AxesSubplot at 0x11ecf45c0>
# 用seaborn话race和gender的盒图
In[82]: ax = sns.boxplot(x='GENDER', y='BASE_SALARY', data=employee, hue='RACE', palette='Greys')
ax.figure.set_size_inches(14,4)
Out[82]:
# pandas则要为gender创建两个独立的Axes,然后根据race画盒图
In[83]: fig, ax_array = plt.subplots(1, 2, figsize=(14,4), sharey=True)
for g, ax in zip(['Female', 'Male'], ax_array):
employee.query('GENDER== @g') \
.boxplot(by='RACE', column='BASE_SALARY', ax=ax, rot=20)
ax.set_title(g + ' Salary')
ax.set_xlabel('')
fig.suptitle('')
/Users/Ted/anaconda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:57: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
return getattr(obj, method)(*args, **kwds)
Out[83]: Text(0.5,0.98,'')
# pandas也可以列表分离多个变量,但是画的图不优雅
In[84]: ax = employee.boxplot(by=['GENDER', 'RACE'],
column='BASE_SALARY',
figsize=(16,4), rot=15)
ax.figure.suptitle('')
/Users/Ted/anaconda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:57: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
return getattr(obj, method)(*args, **kwds)
Out[84]: Text(0.5,0.98,'')