10. 创建、删除列
# 通过[列名]添加新列
In[96]: movie = pd.read_csv('data/movie.csv')
In[97]: movie['has_seen'] = 0
In[98]: movie.columns
Out[98]: Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
'imdb_score', 'aspect_ratio', 'movie_facebook_likes', 'has_seen'],
dtype='object')
# 给新列赋值
In[99]: movie['actor_director_facebook_likes'] = (movie['actor_1_facebook_likes'] +
movie['actor_2_facebook_likes'] +
movie['actor_3_facebook_likes'] +
movie['director_facebook_likes'])
In[100]: movie['actor_director_facebook_likes'].isnull().sum()
Out[100]: 122
# 用all()检查是否所有的布尔值都为True
In[101]: movie['actor_director_facebook_likes'] = movie['actor_director_facebook_likes'].fillna(0)
In[102]: movie['is_cast_likes_more'] = (movie['cast_total_facebook_likes'] >=
movie['actor_director_facebook_likes'])
In[103]: movie['is_cast_likes_more'].all()
Out[103]: False
In[104]: movie = movie.drop('actor_director_facebook_likes', axis='columns')
In[105]: movie['actor_total_facebook_likes'] = (movie['actor_1_facebook_likes'] +
movie['actor_2_facebook_likes'] +
movie['actor_3_facebook_likes'])
movie['actor_total_facebook_likes'] = movie['actor_total_facebook_likes'].fillna(0)
In[106]: movie['is_cast_likes_more'] = movie['cast_total_facebook_likes'] >= \
movie['actor_total_facebook_likes']
movie['is_cast_likes_more'].all()
Out[106]: True
In[107]: movie['pct_actor_cast_like'] = (movie['actor_total_facebook_likes'] /
movie['cast_total_facebook_likes'])
In[108]: movie['pct_actor_cast_like'].min(), movie['pct_actor_cast_like'].max()
Out[108]: (0.0, 1.0)
In[109]: movie.set_index('movie_title')['pct_actor_cast_like'].head()
Out[109]: movie_title
Avatar 0.577369
Pirates of the Caribbean: At World's End 0.951396
Spectre 0.987521
The Dark Knight Rises 0.683783
Star Wars: Episode VII - The Force Awakens 0.000000
Name: pct_actor_cast_like, dtype: float64
更多
# 用insert()方法原地插入列
In[110]: profit_index = movie.columns.get_loc('gross') + 1
profit_index
In[111]: movie.insert(loc=profit_index,
column='profit',
value=movie['gross'] - movie['budget'])
In[112]: movie.head()
Out[112]: