8. 找到最常见的最大值

  1. # 读取college,过滤出只包含本科生种族比例信息的列
  2. In[90]: pd.options.display.max_rows= 40
  3. In[91]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
  4. college_ugds = college.filter(like='UGDS_')
  5. college_ugds.head()
  6. Out[91]:

8. 找到最常见的最大值 - 图1

  1. # 用idxmax方法选出每行种族比例最高的列名
  2. In[92]: highest_percentage_race = college_ugds.idxmax(axis='columns')
  3. highest_percentage_race.head()
  4. Out[92]: INSTNM
  5. Alabama A & M University UGDS_BLACK
  6. University of Alabama at Birmingham UGDS_WHITE
  7. Amridge University UGDS_BLACK
  8. University of Alabama in Huntsville UGDS_WHITE
  9. Alabama State University UGDS_BLACK
  10. dtype: object
  1. # 用value_counts,查看最大值的分布
  2. In[93]: highest_percentage_race.value_counts(normalize=True)
  3. Out[93]: UGDS_WHITE 0.670352
  4. UGDS_BLACK 0.151586
  5. UGDS_HISP 0.129473
  6. UGDS_UNKN 0.023422
  7. UGDS_ASIAN 0.012074
  8. UGDS_AIAN 0.006110
  9. UGDS_NRA 0.004073
  10. UGDS_NHPI 0.001746
  11. UGDS_2MOR 0.001164
  12. dtype: float64

更多

  1. # 对于黑人比例最高的学校,排名第二的种族的分布情况
  2. In[94]: college_black = college_ugds[highest_percentage_race == 'UGDS_BLACK']
  3. college_black = college_black.drop('UGDS_BLACK', axis='columns')
  4. college_black.idxmax(axis='columns').value_counts(normalize=True)
  5. Out[94]: UGDS_WHITE 0.670352
  6. UGDS_BLACK 0.151586
  7. UGDS_HISP 0.129473
  8. UGDS_UNKN 0.023422
  9. UGDS_ASIAN 0.012074
  10. UGDS_AIAN 0.006110
  11. UGDS_NRA 0.004073
  12. UGDS_NHPI 0.001746
  13. UGDS_2MOR 0.001164
  14. dtype: float64