7. 矩阵转置
In[62]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
college_ugds_ = college.filter(like='UGDS_')
college_ugds_.head()
Out[62]:
# count()返回非缺失值的个数
In[63]: college_ugds_.count()
Out[63]: UGDS_WHITE 6874
UGDS_BLACK 6874
UGDS_HISP 6874
UGDS_ASIAN 6874
...
UGDS_NHPI 6874
UGDS_2MOR 6874
UGDS_NRA 6874
UGDS_UNKN 6874
Length: 9, dtype: int64
# axis默认设为0
In[64]: college_ugds_.count(axis=0)
Out[64]: UGDS_WHITE 6874
UGDS_BLACK 6874
UGDS_HISP 6874
UGDS_ASIAN 6874
...
UGDS_NHPI 6874
UGDS_2MOR 6874
UGDS_NRA 6874
UGDS_UNKN 6874
Length: 9, dtype: int64
# 等价于axis='index'
In[65]: college_ugds_.count(axis='index')
Out[65]: UGDS_WHITE 6874
UGDS_BLACK 6874
UGDS_HISP 6874
UGDS_ASIAN 6874
...
UGDS_NHPI 6874
UGDS_2MOR 6874
UGDS_NRA 6874
UGDS_UNKN 6874
Length: 9, dtype: int64
# 统计每行的非缺失值个数
In[66]: college_ugds_.count(axis='columns').head()
Out[66]: INSTNM
Alabama A & M University 9
University of Alabama at Birmingham 9
Amridge University 9
University of Alabama in Huntsville 9
Alabama State University 9
dtype: int64
# 除了统计每行的非缺失值个数,也可以求和加以确认
In[67]: college_ugds_.sum(axis='columns').head()
Out[67]: INSTNM
Alabama A & M University 1.0000
University of Alabama at Birmingham 0.9999
Amridge University 1.0000
University of Alabama in Huntsville 1.0000
Alabama State University 1.0000
dtype: float64
# 用中位数了解每列的分布
In[68]: college_ugds_.median(axis='index')
Out[68]: UGDS_WHITE 0.55570
UGDS_BLACK 0.10005
UGDS_HISP 0.07140
UGDS_ASIAN 0.01290
...
UGDS_NHPI 0.00000
UGDS_2MOR 0.01750
UGDS_NRA 0.00000
UGDS_UNKN 0.01430
Length: 9, dtype: float64
更多
# 使用累积求和cumsum()可以很容易看到白人、黑人、西班牙裔的比例
In[69]: college_ugds_cumsum = college_ugds_.cumsum(axis=1)
college_ugds_cumsum.head()
Out[69]:
# UGDS_HISP一列降序排列
In[70]: college_ugds_cumsum.sort_values('UGDS_HISP', ascending=False)
Out[70]: