7. 串联Series方法
# value_counts().head(3),计数,查看前三
In[76]: movie = pd.read_csv('data/movie.csv')
actor_1_fb_likes = movie['actor_1_facebook_likes']
director = movie['director_name']
In[77]: director.value_counts().head(3)
Out[77]: Steven Spielberg 26
Woody Allen 22
Clint Eastwood 20
Name: director_name, dtype: int64
# 统计缺失值的数量
In[78]: actor_1_fb_likes.isnull().sum()
Out[78]: 7
# actor_1_fb_likes的数据类型
In[79]: actor_1_fb_likes.dtype
Out[79]: dtype('float64')
# 缺失值填充为0、转换为整型、查看前五
In[80]: actor_1_fb_likes.fillna(0)\
.astype(int)\
.head()
Out[80]: 0 1000
1 40000
2 11000
3 27000
4 131
Name: actor_1_facebook_likes, dtype: int64
更多
# 缺失值的比例
In[81]: actor_1_fb_likes.isnull().mean()
Out[81]: 0.0014239218877135883
# 使用括号串联
In[82]: (actor_1_fb_likes.fillna(0)
.astype(int)
.head())
Out[82]: 0 1000
1 40000
2 11000
3 27000
4 131
Name: actor_1_facebook_likes, dtype: int64