5. 堆叠面积图,以发现趋势
# 读取meetup_groups数据集
In[66]: meetup = pd.read_csv('data/meetup_groups.csv',
parse_dates=['join_date'],
index_col='join_date')
meetup.head()
Out[66]:
# 算出每周加入每个组的人
In[67]: group_count = meetup.groupby([pd.Grouper(freq='W'), 'group']).size()
group_count.head()
Out[67]: join_date group
2010-11-07 houstonr 5
2010-11-14 houstonr 11
2010-11-21 houstonr 2
2010-12-05 houstonr 1
2011-01-16 houstonr 2
dtype: int64
# 将数据表unstack
In[68]: gc2 = group_count.unstack('group', fill_value=0)
gc2.tail()
Out[68]:
# 做累积求和
In[69]: group_total = gc2.cumsum()
group_total.tail()
Out[69]:
# 将每行分开,已找到其在总数中的百分比
In[70]: row_total = group_total.sum(axis='columns')
group_cum_pct = group_total.div(row_total, axis='index')
group_cum_pct.tail()
Out[70]:
# 话堆叠面积图
In[71]: ax = group_cum_pct.plot(kind='area', figsize=(18,4),
cmap='Greys', xlim=('2013-6', None),
ylim=(0, 1), legend=False)
ax.figure.suptitle('Houston Meetup Groups', size=25)
ax.set_xlabel('')
ax.yaxis.tick_right()
plot_kwargs = dict(xycoords='axes fraction', size=15)
ax.annotate(xy=(.1, .7), s='R Users', color='w', **plot_kwargs)
ax.annotate(xy=(.25, .16), s='Data Visualization', color='k', **plot_kwargs)
ax.annotate(xy=(.5, .55), s='Energy Data Science', color='k', **plot_kwargs)
ax.annotate(xy=(.83, .07), s='Data Science', color='k', **plot_kwargs)
ax.annotate(xy=(.86, .78), s='Machine Learning', color='w', **plot_kwargs)
Out[71]: Text(0.86,0.78,'Machine Learning')
更多
# 用饼图查看每组随时间的分布情况
In[72]: pie_data = group_cum_pct.asfreq('3MS', method='bfill') \
.tail(6).to_period('M').T
pie_data
Out[72]:
In[73]: from matplotlib.cm import Greys
greys = Greys(np.arange(50,250,40))
ax_array = pie_data.plot(kind='pie', subplots=True,
layout=(2,3), labels=None,
autopct='%1.0f%%', pctdistance=1.22,
colors=greys)
ax1 = ax_array[0, 0]
ax1.figure.legend(ax1.patches, pie_data.index, ncol=3)
for ax in ax_array.flatten():
ax.xaxis.label.set_visible(True)
ax.set_xlabel(ax.get_ylabel())
ax.set_ylabel('')
ax1.figure.subplots_adjust(hspace=.3)
Out[73]: