1. Python和Pandas日期工具的区别
# 引入datetime模块,创建date、time和datetime对象
In[2]: import datetime
date = datetime.date(year=2013, month=6, day=7)
time = datetime.time(hour=12, minute=30, second=19, microsecond=463198)
dt = datetime.datetime(year=2013, month=6, day=7,
hour=12, minute=30, second=19, microsecond=463198)
print("date is ", date)
print("time is", time)
print("datetime is", dt)
date is 2013-06-07
time is 12:30:19.463198
datetime is 2013-06-07 12:30:19.463198
# 创建并打印一个timedelta对象
In[3]: td = datetime.timedelta(weeks=2, days=5, hours=10, minutes=20,
seconds=6.73, milliseconds=99, microseconds=8)
print(td)
19 days, 10:20:06.829008
# 将date和datetime,与timedelta做加减
In[4]: print('new date is', date + td)
print('new datetime is', dt + td)
new date is 2013-06-26
new datetime is 2013-06-26 22:50:26.292206
# time和timedelta不能做加法
In[5]: time + td
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-bd4e11db43bd> in <module>()
----> 1 time + td
TypeError: unsupported operand type(s) for +: 'datetime.time' and 'datetime.timedelta'
# 再来看一下pandas的Timestamp对象。Timestamp构造器比较灵活,可以处理多种输入
In[6]: pd.Timestamp(year=2012, month=12, day=21, hour=5, minute=10, second=8, microsecond=99)
Out[6]: Timestamp('2012-12-21 05:10:08.000099')
In[7]: pd.Timestamp('2016/1/10')
Out[7]: Timestamp('2016-01-10 00:00:00')
In[8]: pd.Timestamp('2014-5/10')
Out[8]: Timestamp('2014-05-10 00:00:00')
In[9]: pd.Timestamp('Jan 3, 2019 20:45.56')
Out[9]: Timestamp('2019-01-03 20:45:33')
In[10]: pd.Timestamp('2016-01-05T05:34:43.123456789')
Out[10]: Timestamp('2016-01-05 05:34:43.123456789')
# 也可以传入一个整数或浮点数,表示距离1970年1月1日的时间
In[11]: pd.Timestamp(500)
Out[11]: Timestamp('1970-01-01 00:00:00.000000500')
In[12]: pd.Timestamp(5000, unit='D')
Out[12]: Timestamp('1983-09-10 00:00:00')
# pandas的to_datetime函数与Timestamp类似,但有些参数不同
In[13]: pd.to_datetime('2015-5-13')
Out[13]: Timestamp('2015-05-13 00:00:00')
In[14]: pd.to_datetime('2015-13-5', dayfirst=True)
Out[14]: Timestamp('2015-05-13 00:00:00')
In[15]: pd.Timestamp('Saturday September 30th, 2017')
Out[15]: Timestamp('2017-09-30 00:00:00')
In[16]: pd.to_datetime('Start Date: Sep 30, 2017 Start Time: 1:30 pm', format='Start Date: %b %d, %Y Start Time: %I:%M %p')
Out[16]: Timestamp('2017-09-30 13:30:00')
In[17]: pd.to_datetime(100, unit='D', origin='2013-1-1')
Out[17]: Timestamp('2013-04-11 00:00:00')
# to_datetime可以将一个字符串或整数列表或Series转换为时间戳
In[18]: s = pd.Series([10, 100, 1000, 10000])
pd.to_datetime(s, unit='D')
Out[18]: 0 1970-01-11
1 1970-04-11
2 1972-09-27
3 1997-05-19
dtype: datetime64[ns]
In[19]: s = pd.Series(['12-5-2015', '14-1-2013', '20/12/2017', '40/23/2017'])
pd.to_datetime(s, dayfirst=True, errors='coerce')
Out[19]: 0 2015-05-12
1 2013-01-14
2 2017-12-20
3 NaT
dtype: datetime64[ns]
In[20]: pd.to_datetime(['Aug 3 1999 3:45:56', '10/31/2017'])
Out[20]: DatetimeIndex(['1999-08-03 03:45:56', '2017-10-31 00:00:00'], dtype='datetime64[ns]', freq=None)
# Pandas的Timedelta和to_timedelta也可以用来表示一定的时间量。
# to_timedelta函数可以产生一个Timedelta对象。
# 与to_datetime类似,to_timedelta也可以转换列表或Series变成Timedelta对象。
In[21]: pd.Timedelta('12 days 5 hours 3 minutes 123456789 nanoseconds')
Out[21]: Timedelta('12 days 05:03:00.123456')
In[22]: pd.Timedelta(days=5, minutes=7.34)
Out[22]: Timedelta('5 days 00:07:20.400000')
In[23]: pd.Timedelta(100, unit='W')
Out[23]: Timedelta('700 days 00:00:00')
In[24]: pd.to_timedelta('5 dayz', errors='ignore')
Out[24]: '5 dayz'
In[25]: pd.to_timedelta('67:15:45.454')
Out[25]: Timedelta('2 days 19:15:45.454000')
In[26]: s = pd.Series([10, 100])
pd.to_timedelta(s, unit='s')
Out[26]: 0 00:00:10
1 00:01:40
dtype: timedelta64[ns]
In[27]: time_strings = ['2 days 24 minutes 89.67 seconds', '00:45:23.6']
pd.to_timedelta(time_strings)
Out[27]: TimedeltaIndex(['2 days 00:25:29.670000', '0 days 00:45:23.600000'], dtype='timedelta64[ns]', freq=None)
# Timedeltas对象可以和Timestamps互相加减,甚至可以相除返回一个浮点数
In[28]: pd.Timedelta('12 days 5 hours 3 minutes') * 2
Out[28]: Timedelta('24 days 10:06:00')
In[29]: pd.Timestamp('1/1/2017') + pd.Timedelta('12 days 5 hours 3 minutes') * 2
Out[29]: Timestamp('2017-01-25 10:06:00')
In[30]: td1 = pd.to_timedelta([10, 100], unit='s')
td2 = pd.to_timedelta(['3 hours', '4 hours'])
td1 + td2
Out[30]: TimedeltaIndex(['03:00:10', '04:01:40'], dtype='timedelta64[ns]', freq=None)
In[31]: pd.Timedelta('12 days') / pd.Timedelta('3 days')
Out[31]: 4.0
# Timestamps 和 Timedeltas有许多可用的属性和方法,下面列举了一些:
In[32]: ts = pd.Timestamp('2016-10-1 4:23:23.9')
In[33]: ts.ceil('h')
Out[33]: Timestamp('2016-10-01 05:00:00')
In[34]: ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second
Out[34]: (2016, 10, 1, 4, 23, 23)
In[35]: ts.dayofweek, ts.dayofyear, ts.daysinmonth
Out[35]: (5, 275, 31)
In[36]: ts.to_pydatetime()
Out[36]: datetime.datetime(2016, 10, 1, 4, 23, 23, 900000)
In[37]: td = pd.Timedelta(125.8723, unit='h')
td
Out[37]: Timedelta('5 days 05:52:20.280000')
In[38]: td.round('min')
Out[38]: Timedelta('5 days 05:52:00')
In[39]: td.components
Out[39]: Components(days=5, hours=5, minutes=52, seconds=20, milliseconds=280, microseconds=0, nanoseconds=0)
In[40]: td.total_seconds()
Out[40]: 453140.28
更多
# 对比一下,在使用和没使用格式指令的条件下,将字符串转换为Timestamps对象的速度
In[41]: date_string_list = ['Sep 30 1984'] * 10000
In[42]: %timeit pd.to_datetime(date_string_list, format='%b %d %Y')
37.8 ms ± 556 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In[43]: %timeit pd.to_datetime(date_string_list)
1.33 s ± 57.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)