3. 在整个DataFrame上操作
In[18]: pd.options.display.max_rows = 8
movie = pd.read_csv('data/movie.csv')
# 打印行数和列数
movie.shape
Out[18]: (4916, 28)
# 打印数据的个数
In[19]: movie.size
Out[19]: 137648
# 该数据集的维度
In[20]: movie.ndim
Out[20]: 2
# 该数据集的长度
In[21]: len(movie)
Out[21]: 4916
# 各个列的值的个数
In[22]: movie.count()
Out[22]: color 4897
director_name 4814
num_critic_for_reviews 4867
duration 4901
...
actor_2_facebook_likes 4903
imdb_score 4916
aspect_ratio 4590
movie_facebook_likes 4916
Length: 28, dtype: int64
# 各列的最小值
In[23]: movie.min()
Out[23]: num_critic_for_reviews 1.00
duration 7.00
director_facebook_likes 0.00
actor_3_facebook_likes 0.00
...
actor_2_facebook_likes 0.00
imdb_score 1.60
aspect_ratio 1.18
movie_facebook_likes 0.00
Length: 16, dtype: float64
# 打印描述信息
In[24]: movie.describe()
Out[24]:
# 使用percentiles参数指定分位数
In[25]: pd.options.display.max_rows = 10
In[26]: movie.describe(percentiles=[.01, .3, .99])
Out[26]:
# 打印各列空值的个数
In[27]: pd.options.display.max_rows = 8
In[28]: movie.isnull().sum()
Out[28]: color 19
director_name 102
num_critic_for_reviews 49
duration 15
...
actor_2_facebook_likes 13
imdb_score 0
aspect_ratio 326
movie_facebook_likes 0
Length: 28, dtype: int64
更多
# 设定skipna=False,没有缺失值的数值列才会计算结果
In[29]: movie.min(skipna=False)
Out[29]: num_critic_for_reviews NaN
duration NaN
director_facebook_likes NaN
actor_3_facebook_likes NaN
...
actor_2_facebook_likes NaN
imdb_score 1.6
aspect_ratio NaN
movie_facebook_likes 0.0
Length: 16, dtype: float64