README.md: better downsampling code

2016-05-25 10:28:12 +00:00 · 2016-05-25 10:28:12 +00:00 · 7b93a93e5f
commit 7b93a93e5f
parent 5bf6236fab
2 changed files with 6 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -32,9 +32,12 @@ import pandas as pd
 # assuming the time series are stored in a tab seperated file, where `time` is
 # the name of the column containing the timestamp
 df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)
-df = df.fillna(method="bfill", limit=1e9)
-# drop rows with the same time stamp
-df = df.groupby(level=0).first()
+# use a meaningful sample size depending on how the frequency of your time series:
+# Higher is more accurate, but if series gets too long, the calculation gets cpu and memory intensive.
+# Keeping the length below 2000 values is usually a good idea.
+df = df.resample("500ms").mean()
+df.interpolate(method="time", limit_direction="both", inplace=True)
+df.fillna(method="bfill", inplace=True)
 ```

 - kshape also expect no time series with a constant observation value or 'n/a'
--- a/kshape.py
+++ b/kshape.py
@ -7,8 +7,6 @@ from numpy.linalg import norm
 from numpy.fft import fft, ifft


-#from scipy.linalg import eigh
-
 def zscore(a, axis=0, ddof=0):
    a = np.asanyarray(a)
    mns = a.mean(axis=axis)