9. 使用查询方法提高布尔索引的可读性
# 读取employee数据,确定选取的部门和列
In[65]: employee = pd.read_csv('data/employee.csv')
depts = ['Houston Police Department-HPD', 'Houston Fire Department (HFD)']
select_columns = ['UNIQUE_ID', 'DEPARTMENT', 'GENDER', 'BASE_SALARY']
# 创建查询字符串,并执行query方法
In[66]: qs = "DEPARTMENT in @depts " \
"and GENDER == 'Female' " \
"and 80000 <= BASE_SALARY <= 120000"
emp_filtered = employee.query(qs)
emp_filtered[select_columns].head()
Out[66]:
更多
# 若要不使用部门列表,也可以使用下面的方法
In[67]: top10_depts = employee.DEPARTMENT.value_counts().index[:10].tolist()
qs = "DEPARTMENT not in @top10_depts and GENDER == 'Female'"
employee_filtered2 = employee.query(qs)
employee_filtered2[['DEPARTMENT', 'GENDER']].head()
Out[67]: