第07章分组聚合、过滤、转换 - 8. 减肥对赌 - 《Pandas Cookbook 带注释源码》

8. 减肥对赌

#  读取减肥数据集，查看一月的数据
 In[63]: weight_loss = pd.read_csv('data/weight_loss.csv')
         weight_loss.query('Month == "Jan"')
Out[63]:

#  定义一个求减肥比例的函数
 In[64]: def find_perc_loss(s):
            return (s - s.iloc[0]) / s.iloc[0]
#  查看Bob在一月的减肥成果
 In[65]: bob_jan = weight_loss.query('Name=="Bob" and Month=="Jan"')
         find_perc_loss(bob_jan['Weight'])
Out[65]: 0    0.000000
         2   -0.010309
         4   -0.027491
         6   -0.027491
         Name: Weight, dtype: float64

#  对Name和Month进行分组，然后使用transform方法，传入函数，对数值进行转换
 In[66]: pcnt_loss = weight_loss.groupby(['Name', 'Month'])['Weight'].transform(find_perc_loss)
         pcnt_loss.head(8)
Out[66]: 0    0.000000
         1    0.000000
         2   -0.010309
         3   -0.040609
         4   -0.027491
         5   -0.040609
         6   -0.027491
         7   -0.035533
         Name: Weight, dtype: float64

#  transform之后的结果，行数不变，可以赋值给原始DataFrame作为一个新列；
#  为了缩短输出，只选择Bob的前两个月数据
 In[67]: weight_loss['Perc Weight Loss'] = pcnt_loss.round(3)
         weight_loss.query('Name=="Bob" and Month in ["Jan", "Feb"]')
Out[67]:

#  因为最重要的是每个月的第4周，只选择第4周的数据
 In[68]: week4 = weight_loss.query('Week == "Week 4"')
         week4
Out[68]:

#  用pivot重构DataFrame，让Amy和Bob的数据并排放置
 In[69]: winner = week4.pivot(index='Month', columns='Name', values='Perc Weight Loss')
         winner
Out[69]:

#  用where方法选出每月的赢家
 In[70]: winner['Winner'] = np.where(winner['Amy'] < winner['Bob'], 'Amy', 'Bob')
         winner.style.highlight_min(axis=1)
Out[70]:

#  用value_counts()返回最后的比分
 In[71]: winner.Winner.value_counts()
Out[71]: Amy    3
         Bob    1
         Name: Winner, dtype: int64

#  Pandas默认是按字母排序的
 In[72]: week4a = week4.copy()
         month_chron = week4a['Month'].unique() 
         month_chron
Out[72]: array(['Jan', 'Feb', 'Mar', 'Apr'], dtype=object)

#  转换为Categorical变量，可以做成按时间排序
 In[73]: week4a['Month'] = pd.Categorical(week4a['Month'], 
                                          categories=month_chron,
                                          ordered=True)
         week4a.pivot(index='Month', columns='Name', values='Perc Weight Loss')
Out[73]:

8. 减肥对赌

8. 减肥对赌

更多