3. 用布尔索引过滤
# 读取movie数据集,创建布尔条件
In[15]: movie = pd.read_csv('data/movie.csv', index_col='movie_title')
crit_a1 = movie.imdb_score > 8
crit_a2 = movie.content_rating == 'PG-13'
crit_a3 = (movie.title_year < 2000) | (movie.title_year > 2009)
final_crit_a = crit_a1 & crit_a2 & crit_a3
# 创建第二个布尔条件
In[16]: crit_b1 = movie.imdb_score < 5
crit_b2 = movie.content_rating == 'R'
crit_b3 = (movie.title_year >= 2000) & (movie.title_year <= 2010)
final_crit_b = crit_b1 & crit_b2 & crit_b3
# 将这两个条件用或运算合并起来
In[17]: final_crit_all = final_crit_a | final_crit_b
final_crit_all.head()
Out[17]: movie_title
Avatar False
Pirates of the Caribbean: At World's End False
Spectre False
The Dark Knight Rises True
Star Wars: Episode VII - The Force Awakens False
dtype: bool
# 用最终的布尔条件过滤数据
In[18]: movie[final_crit_all].head()
Out[18]:
# 使用loc,对指定的列做过滤操作,可以清楚地看到过滤是否起作用
In[19]: cols = ['imdb_score', 'content_rating', 'title_year']
movie_filtered = movie.loc[final_crit_all, cols]
movie_filtered.head(10)
Out[19]:
更多
# 用一个长布尔表达式代替前面由短表达式生成的布尔条件
In[21]: final_crit_a2 = (movie.imdb_score > 8) & \
(movie.content_rating == 'PG-13') & \
((movie.title_year < 2000) | (movie.title_year > 2009))
final_crit_a2.equals(final_crit_a)
Out[21]: