8. 找到最常见的最大值
# 读取college,过滤出只包含本科生种族比例信息的列
In[90]: pd.options.display.max_rows= 40
In[91]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
college_ugds = college.filter(like='UGDS_')
college_ugds.head()
Out[91]:
# 用idxmax方法选出每行种族比例最高的列名
In[92]: highest_percentage_race = college_ugds.idxmax(axis='columns')
highest_percentage_race.head()
Out[92]: INSTNM
Alabama A & M University UGDS_BLACK
University of Alabama at Birmingham UGDS_WHITE
Amridge University UGDS_BLACK
University of Alabama in Huntsville UGDS_WHITE
Alabama State University UGDS_BLACK
dtype: object
# 用value_counts,查看最大值的分布
In[93]: highest_percentage_race.value_counts(normalize=True)
Out[93]: UGDS_WHITE 0.670352
UGDS_BLACK 0.151586
UGDS_HISP 0.129473
UGDS_UNKN 0.023422
UGDS_ASIAN 0.012074
UGDS_AIAN 0.006110
UGDS_NRA 0.004073
UGDS_NHPI 0.001746
UGDS_2MOR 0.001164
dtype: float64
更多
# 对于黑人比例最高的学校,排名第二的种族的分布情况
In[94]: college_black = college_ugds[highest_percentage_race == 'UGDS_BLACK']
college_black = college_black.drop('UGDS_BLACK', axis='columns')
college_black.idxmax(axis='columns').value_counts(normalize=True)
Out[94]: UGDS_WHITE 0.670352
UGDS_BLACK 0.151586
UGDS_HISP 0.129473
UGDS_UNKN 0.023422
UGDS_ASIAN 0.012074
UGDS_AIAN 0.006110
UGDS_NRA 0.004073
UGDS_NHPI 0.001746
UGDS_2MOR 0.001164
dtype: float64