1. 检查索引
# 读取college数据集,提取所有的列
In[2]: college = pd.read_csv('data/college.csv')
columns = college.columns
columns
Out[2]: Index(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY', 'RELAFFIL',
'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS', 'UGDS_WHITE',
'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN', 'UGDS_NHPI',
'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF', 'CURROPER', 'PCTPELL',
'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10', 'GRAD_DEBT_MDN_SUPP'], dtype='object')
# 用values属性,访问底层的NumPy数组
In[3]: columns.values
Out[3]: array(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY',
'RELAFFIL', 'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS',
'UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN',
'UGDS_NHPI', 'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF',
'CURROPER', 'PCTPELL', 'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10',
'GRAD_DEBT_MDN_SUPP'], dtype=object)
# 取出该数组的第6个值
In[4]: columns[5]
Out[4]: 'WOMENONLY'
# 取出该数组的第2\9\11
In[5]: columns[[1,8,10]]
Out[5]: Index(['CITY', 'SATMTMID', 'UGDS'], dtype='object')
# 逆序切片选取
In[6]: columns[-7:-4]
Out[6]: Index(['PPTUG_EF', 'CURROPER', 'PCTPELL'], dtype='object')
# 索引有许多和Series和DataFrame相同的方法
In[7]: columns.min(), columns.max(), columns.isnull().sum()
Out[7]: ('CITY', 'WOMENONLY', 0)
# 索引对象可以直接通过字符串方法修改
In[8]: columns + '_A'
Out[8]: Index(['INSTNM_A', 'CITY_A', 'STABBR_A', 'HBCU_A', 'MENONLY_A', 'WOMENONLY_A',
'RELAFFIL_A', 'SATVRMID_A', 'SATMTMID_A', 'DISTANCEONLY_A', 'UGDS_A',
'UGDS_WHITE_A', 'UGDS_BLACK_A', 'UGDS_HISP_A', 'UGDS_ASIAN_A',
'UGDS_AIAN_A', 'UGDS_NHPI_A', 'UGDS_2MOR_A', 'UGDS_NRA_A',
'UGDS_UNKN_A', 'PPTUG_EF_A', 'CURROPER_A', 'PCTPELL_A', 'PCTFLOAN_A',
'UG25ABV_A', 'MD_EARN_WNE_P10_A', 'GRAD_DEBT_MDN_SUPP_A'],
dtype='object')
# 索引对象也可以通过比较运算符,得到布尔索引
In[9]: columns > 'G'
Out[9]: array([ True, False, True, True, True, True, True, True, True,
False, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True], dtype=bool)
# 尝试用赋值的方法,修改索引对象的一个值,会导致类型错误,因为索引对象是不可变类型
In[10]: columns[1] = 'city'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-1e9e8e3125de> in <module>()
----> 1 columns[1] = 'city'
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in __setitem__(self, key, value)
1668
1669 def __setitem__(self, key, value):
-> 1670 raise TypeError("Index does not support mutable operations")
1671
1672 def __getitem__(self, key):
TypeError: Index does not support mutable operations
更多
索引对象支持集合运算:联合、交叉、求差、对称差
# 切片
In[11]: c1 = columns[:4]
c1
Out[11]: Index(['INSTNM', 'CITY', 'STABBR', 'HBCU'], dtype='object')
In[12]: c2 = columns[2:5]
c2
Out[12]: Index(['STABBR', 'HBCU', 'MENONLY'], dtype='object')
# 联合
In[13]: c1.union(c2)
Out[13]: Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')
In[14]: c1 | c2
Out[14]: Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')
# 对称差
In[15]: c1.symmetric_difference(c2)
Out[15]: Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')
In[16]: c1 ^ c2
Out[16]: Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')