Time series data analysis

1. Basic operation of time series

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pandas as pd

print(pd.to_datetime('20180828'))
print()

# Pass in multiple datetime strings
date_index = pd.to_datetime(['20180820', '20180828', '20180908'])
print(date_index)
print()
print(date_index[0])
print()

date_ser = pd.Series([11, 22, 33], index=date_index)
print(date_ser)
print()

1
2
3
4
5
6
7
8
9
10
11
12
from datetime import datetime

import pandas as pd

data_demo = [[11, 22, 33], [44, 55, 66],
[77, 88, 99], [12, 23, 34]]

date_list = [datetime(2018, 1, 23), datetime(2018, 2, 15),
datetime(2018, 5, 22), datetime(2018, 3, 30)]
time_df = pd.DataFrame(data_demo, index=date_list)

print(time_df)

2. Fixed frequency time series

2.1 Create fixed frequency time series

official documentation: https://pandas.pydata.org/docs/reference/api/pandas.date_range.html

1
2
3
4
import pandas as pd

# Create a DatetimeIndex object, passing in only the start date and end date
print(pd.date_range('2018/08/10', '2018/08/20'))

1
2
3
4
import pandas as pd

# Create a DatetimeIndex object, pass in the start and periods parameters
print(pd.date_range(start='2018/08/10', periods=5))

1
2
3
4
import pandas as pd

# Create a DatetimeIndex object, pass in the end and periods parameters
print(pd.date_range(end='2018/08/10', periods=5))

1
2
3
4
5
6
7
8
9
import pandas as pd

# every Sunday
dates_index = pd.date_range('2018-01-01', periods=5, freq='W-SUN')
print(dates_index)
print()

ser_data = [12, 56, 89, 99, 31]
print(pd.Series(ser_data, dates_index))

1
2
3
4
5
6
import pandas as pd

# Create a DatetimeIndex and specify the start date, the number of dates generated,
# the default frequency, and the time zone
print(pd.date_range(start='2018/8/1 12:13:30', periods=5,
tz='Asia/Hong_Kong'))

1
2
3
4
5
6
import pandas as pd

# normalized timestamp
print(pd.date_range(start='2018/8/1 12:13:30', periods=5,
normalize=True, tz='Asia/Hong_Kong'))

2.2 Frequency and offset of time series

1
2
3
from pandas.tseries.offsets import *

print(Week(2) + Hour(10))

1
2
3
4
5
import pandas as pd
from pandas.tseries.offsets import *

date_offset = Week(2) + Hour(10)
print(pd.date_range('2018/3/1', '2018/3/31', freq=date_offset))

2.3 Shifting of time series

1
2
3
4
5
6
7
8
9
10
11
12
13
import numpy as np
import pandas as pd

date_index = pd.date_range('2018/01/01', periods=5)
time_ser = pd.Series(np.arange(5) + 1, index=date_index)
print(time_ser)
print()

print(time_ser.shift(1))
print()

print(time_ser.shift(-1))
print()

3. Time period and calculation

3.1 Create a period object

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import pandas as pd

print(pd.Period(2018))
print()

period = pd.Period('2017/6')
print(period)
print()

print(period + 1)
print()

print(period - 5)
print()

other_period = pd.Period(201201, freq='M')
print(period - other_period)
print()

1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import pandas as pd

period_index = pd.period_range('2012.1.8', '2012.5.31', freq='M')
print(period_index)
print()

str_list = ['2010', '2011', '2012']
print(pd.PeriodIndex(str_list, freq='A-DEC'))
print()

print(pd.Series(np.arange(5), period_index))

3.2 Period frequency conversion

1
2
3
4
5
6
import pandas as pd

period = pd.Period('2017', freq='A-DEC')

print(period.asfreq('M', how='start'))
print(period.asfreq('M', how='end'))

4. Resampling

4.1 Resample

1
2
3
4
5
6
7
8
9
10
import numpy as np
import pandas as pd

date_index = pd.date_range('2017.7.8', periods=30)
time_ser = pd.Series(np.arange(30), index=date_index)

print(time_ser)
print()

print(time_ser.resample('W-MON').mean())

4.2 Downsampling

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import numpy as np
import pandas as pd

date_index = pd.date_range('2018/06/01', periods=30)

shares_data = np.random.rand(30)

time_ser = pd.Series(shares_data, index=date_index)
print(time_ser)
print()

print(time_ser.resample('7D').ohlc())
print()

print(time_ser.groupby(lambda x: x.week).mean())

4.3 Upsampling

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import numpy as np
import pandas as pd

data_demo = np.array([['101', '210', '150'],
['330', '460', '580']])
date_index = pd.date_range('2018/06/10', periods=2, freq='W-SUN')
time_df = pd.DataFrame(data_demo, index=date_index,
columns=['A产品', 'B产品', 'C产品'])
print(time_df)
print()

print(time_df.resample('D').asfreq())
print()

print(time_df.resample('D').ffill())

5. Data statistics - sliding window

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

year_data = np.random.randn(365)
date_index = pd.date_range('2017-01-01', '2017-12-31', freq='D')
ser = pd.Series(year_data, date_index)
print(ser.head())
print()

# Create a sliding window
roll_window = ser.rolling(window=10)
print(roll_window)
print()

print(roll_window.mean())
print()

plt.plot(ser, 'y--')
ser_window = ser.rolling(window=10).mean()
plt.plot(ser_window, 'b')

plt.show()


6. Time series model - Arima


Time series data analysis
https://www.hardyhu.cn/2022/03/27/Time-series-data-analysis/
Author
John Doe
Posted on
March 27, 2022
Licensed under