5. 在DataFrame上使用运算符
# college数据集的值既有数值也有对象,整数5不能与字符串相加
In[37]: college = pd.read_csv('data/college.csv')
college + 5
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in na_op(x, y)
1175 result = expressions.evaluate(op, str_rep, x, y,
-> 1176 raise_on_error=True, **eval_kwargs)
1177 except TypeError:
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
210 return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
--> 211 **eval_kwargs)
212 return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b, raise_on_error, truediv, reversed, **eval_kwargs)
121 if result is None:
--> 122 result = _evaluate_standard(op, op_str, a, b, raise_on_error)
123
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
63 with np.errstate(all='ignore'):
---> 64 return op(a, b)
65
TypeError: must be str, not int
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
1183 with np.errstate(all='ignore'):
-> 1184 result = get_result(other)
1185
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in get_result(other)
1152 else:
-> 1153 result = func(values, other)
1154
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in na_op(x, y)
1201 with np.errstate(all='ignore'):
-> 1202 result[mask] = op(xrav, y)
1203 else:
TypeError: must be str, not int
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-37-4749f68a2501> in <module>()
1 college = pd.read_csv('data/college.csv')
----> 2 college + 5
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other, axis, level, fill_value)
1239 self = self.fillna(fill_value)
1240
-> 1241 return self._combine_const(other, na_op)
1242
1243 f.__name__ = name
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _combine_const(self, other, func, raise_on_error)
3541 def _combine_const(self, other, func, raise_on_error=True):
3542 new_data = self._data.eval(func=func, other=other,
-> 3543 raise_on_error=raise_on_error)
3544 return self._constructor(new_data)
3545
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, **kwargs)
3195
3196 def eval(self, **kwargs):
-> 3197 return self.apply('eval', **kwargs)
3198
3199 def quantile(self, **kwargs):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3089
3090 kwargs['mgr'] = self
-> 3091 applied = getattr(b, f)(**kwargs)
3092 result_blocks = _extend_blocks(applied, result_blocks)
3093
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
1189 raise
1190 except Exception as detail:
-> 1191 result = handle_error()
1192
1193 # technically a broadcast error in numpy can 'work' by returning a
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in handle_error()
1172 # The 'detail' variable is defined in outer scope.
1173 raise TypeError('Could not operate %s with block values %s' %
-> 1174 (repr(other), str(detail))) # noqa
1175 else:
1176 # return the values
TypeError: Could not operate 5 with block values must be str, not int
# 行索引名设为INSTNM,用UGDS_过滤出本科生的种族比例
In[38]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
college_ugds_ = college.filter(like='UGDS_')
In[39]: college == 'asdf' # 这是jn上的,想要比较college和‘asdf’,没有意义,忽略
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-39-697c8af60bcf> in <module>()
----> 1 college == 'asdf'
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other)
1302 # straight boolean comparisions we want to allow all columns
1303 # (regardless of dtype to pass thru) See # 4537 for discussion.
-> 1304 res = self._combine_const(other, func, raise_on_error=False)
1305 return res.fillna(True).astype(bool)
1306
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _combine_const(self, other, func, raise_on_error)
3541 def _combine_const(self, other, func, raise_on_error=True):
3542 new_data = self._data.eval(func=func, other=other,
-> 3543 raise_on_error=raise_on_error)
3544 return self._constructor(new_data)
3545
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, **kwargs)
3195
3196 def eval(self, **kwargs):
-> 3197 return self.apply('eval', **kwargs)
3198
3199 def quantile(self, **kwargs):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3089
3090 kwargs['mgr'] = self
-> 3091 applied = getattr(b, f)(**kwargs)
3092 result_blocks = _extend_blocks(applied, result_blocks)
3093
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
1203
1204 raise TypeError('Could not compare [%s] with block values' %
-> 1205 repr(other))
1206
1207 # transpose if needed
TypeError: Could not compare ['asdf'] with block values
# 查看前5行
In[40]: college_ugds_.head()
Out[40]:
# 现在都是均质数据了,可以进行数值运算
In[41]: college_ugds_.head() + .00501
Out[41]:
# 用底除计算百分比分数
In[42]: (college_ugds_.head() + .00501) // .01
Out[42]:
# 再除以100
In[43]: college_ugds_op_round = (college_ugds_ + .00501) // .01 / 100
college_ugds_op_round.head()
Out[43]:
# 保留两位小数
In[44]: college_ugds_round = (college_ugds_ + .00001).round(2)
college_ugds_round.head()
Out[44]:
In[45]: .045 + .005
Out[45]: 0.049999999999999996
In[46]: college_ugds_op_round.equals(college_ugds_round)
Out[46]: True
更多
# DataFrame的通用函数也可以实现上述方法
In[47]: college_ugds_op_round_methods = college_ugds_.add(.00501).floordiv(.01).div(100)