8. 确定大学校园多样性
# US News给出的美国10所最具多样性的大学
In[71]: pd.read_csv('data/college_diversity.csv', index_col='School')
Out[71]:
In[72]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
college_ugds_ = college.filter(like='UGDS_')
college_ugds_.head()
Out[72]:
In[73]: college_ugds_.isnull().sum(axis=1).sort_values(ascending=False).head()
Out[73]: INSTNM
Excel Learning Center-San Antonio South 9
Philadelphia College of Osteopathic Medicine 9
Assemblies of God Theological Seminary 9
Episcopal Divinity School 9
Phillips Graduate Institute 9
dtype: int64
# 如果所有列都是缺失值,则将其去除
In[74]: college_ugds_ = college_ugds_.dropna(how='all')
In[75]: college_ugds_.isnull().sum()
Out[75]: UGDS_WHITE 0
UGDS_BLACK 0
UGDS_HISP 0
UGDS_ASIAN 0
..
UGDS_NHPI 0
UGDS_2MOR 0
UGDS_NRA 0
UGDS_UNKN 0
Length: 9, dtype: int64
# 用大于或等于方法ge(),将DataFrame变为布尔值矩阵
In[76]: college_ugds_.ge(.15).head()
Out[76]:
# 对所有True值求和
In[77]: diversity_metric = college_ugds_.ge(.15).sum(axis='columns')
diversity_metric.head()
Out[77]: INSTNM
Alabama A & M University 1
University of Alabama at Birmingham 2
Amridge University 3
University of Alabama in Huntsville 1
Alabama State University 1
dtype: int64
# 使用value_counts(),查看分布情况
In[78]: diversity_metric.value_counts()
Out[78]: 1 3042
2 2884
3 876
4 63
0 7
5 2
dtype: int64
# 查看哪些学校种群比例超过15%的数量多
In[79]: diversity_metric.sort_values(ascending=False).head()
Out[79]: INSTNM
Regency Beauty Institute-Austin 5
Central Texas Beauty College-Temple 5
Sullivan and Cogliano Training Center 4
Ambria College of Nursing 4
Berkeley College-New York 4
dtype: int64
# 用loc()方法查看对应行索引的行
In[80]: college_ugds_.loc[['Regency Beauty Institute-Austin',
'Central Texas Beauty College-Temple']]
Out[80]:
# 查看US News前五所最具多样性的大学在diversity_metric中的情况
In[81]: us_news_top = ['Rutgers University-Newark',
'Andrews University',
'Stanford University',
'University of Houston',
'University of Nevada-Las Vegas']
In[82]: diversity_metric.loc[us_news_top]
Out[82]: INSTNM
Rutgers University-Newark 4
Andrews University 3
Stanford University 3
University of Houston 3
University of Nevada-Las Vegas 3
dtype: int64
更多
# 可以用最大种群比例查看哪些学校最不具有多样性
In[83]: college_ugds_.max(axis=1).sort_values(ascending=False).head(10)
Out[83]: INSTNM
Dewey University-Manati 1.0
Yeshiva and Kollel Harbotzas Torah 1.0
Mr Leon's School of Hair Design-Lewiston 1.0
Dewey University-Bayamon 1.0
...
Monteclaro Escuela de Hoteleria y Artes Culinarias 1.0
Yeshiva Shaar Hatorah 1.0
Bais Medrash Elyon 1.0
Yeshiva of Nitra Rabbinical College 1.0
Length: 10, dtype: float64
# 查看Talmudical Seminary Oholei Torah哲学学校
In[84]: college_ugds_.loc['Talmudical Seminary Oholei Torah']
Out[84]: UGDS_WHITE 1.0
UGDS_BLACK 0.0
UGDS_HISP 0.0
UGDS_ASIAN 0.0
...
UGDS_NHPI 0.0
UGDS_2MOR 0.0
UGDS_NRA 0.0
UGDS_UNKN 0.0
Name: Talmudical Seminary Oholei Torah, Length: 9, dtype: float64
# 查看是否有学校九个种族的比例都超过了1%
In[85]: (college_ugds_ > .01).all(axis=1).any()
Out[85]: True