1. Python和Pandas日期工具的区别

  1. # 引入datetime模块,创建date、time和datetime对象
  2. In[2]: import datetime
  3. date = datetime.date(year=2013, month=6, day=7)
  4. time = datetime.time(hour=12, minute=30, second=19, microsecond=463198)
  5. dt = datetime.datetime(year=2013, month=6, day=7,
  6. hour=12, minute=30, second=19, microsecond=463198)
  7. print("date is ", date)
  8. print("time is", time)
  9. print("datetime is", dt)
  10. date is 2013-06-07
  11. time is 12:30:19.463198
  12. datetime is 2013-06-07 12:30:19.463198
  1. # 创建并打印一个timedelta对象
  2. In[3]: td = datetime.timedelta(weeks=2, days=5, hours=10, minutes=20,
  3. seconds=6.73, milliseconds=99, microseconds=8)
  4. print(td)
  5. 19 days, 10:20:06.829008
  1. # 将date和datetime,与timedelta做加减
  2. In[4]: print('new date is', date + td)
  3. print('new datetime is', dt + td)
  4. new date is 2013-06-26
  5. new datetime is 2013-06-26 22:50:26.292206
  1. # time和timedelta不能做加法
  2. In[5]: time + td
  3. ---------------------------------------------------------------------------
  4. TypeError Traceback (most recent call last)
  5. <ipython-input-5-bd4e11db43bd> in <module>()
  6. ----> 1 time + td
  7. TypeError: unsupported operand type(s) for +: 'datetime.time' and 'datetime.timedelta'
  1. # 再来看一下pandas的Timestamp对象。Timestamp构造器比较灵活,可以处理多种输入
  2. In[6]: pd.Timestamp(year=2012, month=12, day=21, hour=5, minute=10, second=8, microsecond=99)
  3. Out[6]: Timestamp('2012-12-21 05:10:08.000099')
  4. In[7]: pd.Timestamp('2016/1/10')
  5. Out[7]: Timestamp('2016-01-10 00:00:00')
  6. In[8]: pd.Timestamp('2014-5/10')
  7. Out[8]: Timestamp('2014-05-10 00:00:00')
  8. In[9]: pd.Timestamp('Jan 3, 2019 20:45.56')
  9. Out[9]: Timestamp('2019-01-03 20:45:33')
  10. In[10]: pd.Timestamp('2016-01-05T05:34:43.123456789')
  11. Out[10]: Timestamp('2016-01-05 05:34:43.123456789')
  1. # 也可以传入一个整数或浮点数,表示距离1970年1月1日的时间
  2. In[11]: pd.Timestamp(500)
  3. Out[11]: Timestamp('1970-01-01 00:00:00.000000500')
  4. In[12]: pd.Timestamp(5000, unit='D')
  5. Out[12]: Timestamp('1983-09-10 00:00:00')
  1. # pandas的to_datetime函数与Timestamp类似,但有些参数不同
  2. In[13]: pd.to_datetime('2015-5-13')
  3. Out[13]: Timestamp('2015-05-13 00:00:00')
  4. In[14]: pd.to_datetime('2015-13-5', dayfirst=True)
  5. Out[14]: Timestamp('2015-05-13 00:00:00')
  6. In[15]: pd.Timestamp('Saturday September 30th, 2017')
  7. Out[15]: Timestamp('2017-09-30 00:00:00')
  8. In[16]: pd.to_datetime('Start Date: Sep 30, 2017 Start Time: 1:30 pm', format='Start Date: %b %d, %Y Start Time: %I:%M %p')
  9. Out[16]: Timestamp('2017-09-30 13:30:00')
  10. In[17]: pd.to_datetime(100, unit='D', origin='2013-1-1')
  11. Out[17]: Timestamp('2013-04-11 00:00:00')
  1. # to_datetime可以将一个字符串或整数列表或Series转换为时间戳
  2. In[18]: s = pd.Series([10, 100, 1000, 10000])
  3. pd.to_datetime(s, unit='D')
  4. Out[18]: 0 1970-01-11
  5. 1 1970-04-11
  6. 2 1972-09-27
  7. 3 1997-05-19
  8. dtype: datetime64[ns]
  9. In[19]: s = pd.Series(['12-5-2015', '14-1-2013', '20/12/2017', '40/23/2017'])
  10. pd.to_datetime(s, dayfirst=True, errors='coerce')
  11. Out[19]: 0 2015-05-12
  12. 1 2013-01-14
  13. 2 2017-12-20
  14. 3 NaT
  15. dtype: datetime64[ns]
  16. In[20]: pd.to_datetime(['Aug 3 1999 3:45:56', '10/31/2017'])
  17. Out[20]: DatetimeIndex(['1999-08-03 03:45:56', '2017-10-31 00:00:00'], dtype='datetime64[ns]', freq=None)
  1. # Pandas的Timedelta和to_timedelta也可以用来表示一定的时间量。
  2. # to_timedelta函数可以产生一个Timedelta对象。
  3. # 与to_datetime类似,to_timedelta也可以转换列表或Series变成Timedelta对象。
  4. In[21]: pd.Timedelta('12 days 5 hours 3 minutes 123456789 nanoseconds')
  5. Out[21]: Timedelta('12 days 05:03:00.123456')
  6. In[22]: pd.Timedelta(days=5, minutes=7.34)
  7. Out[22]: Timedelta('5 days 00:07:20.400000')
  8. In[23]: pd.Timedelta(100, unit='W')
  9. Out[23]: Timedelta('700 days 00:00:00')
  10. In[24]: pd.to_timedelta('5 dayz', errors='ignore')
  11. Out[24]: '5 dayz'
  12. In[25]: pd.to_timedelta('67:15:45.454')
  13. Out[25]: Timedelta('2 days 19:15:45.454000')
  14. In[26]: s = pd.Series([10, 100])
  15. pd.to_timedelta(s, unit='s')
  16. Out[26]: 0 00:00:10
  17. 1 00:01:40
  18. dtype: timedelta64[ns]
  19. In[27]: time_strings = ['2 days 24 minutes 89.67 seconds', '00:45:23.6']
  20. pd.to_timedelta(time_strings)
  21. Out[27]: TimedeltaIndex(['2 days 00:25:29.670000', '0 days 00:45:23.600000'], dtype='timedelta64[ns]', freq=None)
  1. # Timedeltas对象可以和Timestamps互相加减,甚至可以相除返回一个浮点数
  2. In[28]: pd.Timedelta('12 days 5 hours 3 minutes') * 2
  3. Out[28]: Timedelta('24 days 10:06:00')
  4. In[29]: pd.Timestamp('1/1/2017') + pd.Timedelta('12 days 5 hours 3 minutes') * 2
  5. Out[29]: Timestamp('2017-01-25 10:06:00')
  6. In[30]: td1 = pd.to_timedelta([10, 100], unit='s')
  7. td2 = pd.to_timedelta(['3 hours', '4 hours'])
  8. td1 + td2
  9. Out[30]: TimedeltaIndex(['03:00:10', '04:01:40'], dtype='timedelta64[ns]', freq=None)
  10. In[31]: pd.Timedelta('12 days') / pd.Timedelta('3 days')
  11. Out[31]: 4.0
  1. # Timestamps 和 Timedeltas有许多可用的属性和方法,下面列举了一些:
  2. In[32]: ts = pd.Timestamp('2016-10-1 4:23:23.9')
  3. In[33]: ts.ceil('h')
  4. Out[33]: Timestamp('2016-10-01 05:00:00')
  5. In[34]: ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second
  6. Out[34]: (2016, 10, 1, 4, 23, 23)
  7. In[35]: ts.dayofweek, ts.dayofyear, ts.daysinmonth
  8. Out[35]: (5, 275, 31)
  9. In[36]: ts.to_pydatetime()
  10. Out[36]: datetime.datetime(2016, 10, 1, 4, 23, 23, 900000)
  11. In[37]: td = pd.Timedelta(125.8723, unit='h')
  12. td
  13. Out[37]: Timedelta('5 days 05:52:20.280000')
  14. In[38]: td.round('min')
  15. Out[38]: Timedelta('5 days 05:52:00')
  16. In[39]: td.components
  17. Out[39]: Components(days=5, hours=5, minutes=52, seconds=20, milliseconds=280, microseconds=0, nanoseconds=0)
  18. In[40]: td.total_seconds()
  19. Out[40]: 453140.28

更多

  1. # 对比一下,在使用和没使用格式指令的条件下,将字符串转换为Timestamps对象的速度
  2. In[41]: date_string_list = ['Sep 30 1984'] * 10000
  3. In[42]: %timeit pd.to_datetime(date_string_list, format='%b %d %Y')
  4. 37.8 ms ± 556 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
  5. In[43]: %timeit pd.to_datetime(date_string_list)
  6. 1.33 s ± 57.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)