8. 确定股票收益的正态值
# 加载亚马逊的股票数据,使用Data作为行索引
In[57]: amzn = pd.read_csv('data/amzn_stock.csv', index_col='Date', parse_dates=['Date'])
amzn.head()
Out[57]:
# 选取Close收盘价,用pct_change()计算每日回报率
In[58]: amzn_daily_return = amzn.Close.pct_change()
amzn_daily_return.head()
Out[58]: Date
2010-01-04 NaN
2010-01-05 0.005900
2010-01-06 -0.018116
2010-01-07 -0.017013
2010-01-08 0.027077
Name: Close, dtype: float64
# 去掉缺失值,画一张柱状图,查看分布情况
In[59]: amzn_daily_return = amzn_daily_return.dropna()
amzn_daily_return.hist(bins=20)
Out[59]: <matplotlib.axes._subplots.AxesSubplot at 0x1174b3128>
# 计算平均值和标准差
In[60]: mean = amzn_daily_return.mean()
std = amzn_daily_return.std()
# 计算每个数据的z-score的绝对值:z-score是远离平均值的标准差值得个数
In[61]: abs_z_score = amzn_daily_return.sub(mean).abs().div(std)
# 计算位于1,2,3个标准差之内的收益率的比例
In[62]: pcts = [abs_z_score.lt(i).mean() for i in range(1,4)]
print('{:.3f} fall within 1 standard deviation. '
'{:.3f} within 2 and {:.3f} within 3'.format(*pcts))
0.787 fall within 1 standard deviation. 0.956 within 2 and 0.985 within 3
更多
# 将上面的方法整合成一个函数
In[63]: def test_return_normality(stock_data):
close = stock_data['Close']
daily_return = close.pct_change().dropna()
daily_return.hist(bins=20)
mean = daily_return.mean()
std = daily_return.std()
abs_z_score = abs(daily_return - mean) / std
pcts = [abs_z_score.lt(i).mean() for i in range(1,4)]
print('{:.3f} fall within 1 standard deviation. '
'{:.3f} within 2 and {:.3f} within 3'.format(*pcts))
In[64]: slb = pd.read_csv('data/slb_stock.csv',
index_col='Date', parse_dates=['Date'])
test_return_normality(slb)
0.742 fall within 1 standard deviation. 0.946 within 2 and 0.986 within 3