Compare commits

..

No commits in common. "96bcf6736fd9329d50b4a92c91fba669f10a136b" and "8bd610b72f2edb0d8ffca7092c48dcec92a7bcc9" have entirely different histories.

4 changed files with 57 additions and 182 deletions

1
.envrc
View File

@ -1 +0,0 @@
layout python

90
.gitignore vendored
View File

@ -1,90 +0,0 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
.direnv

View File

@ -1,2 +1,3 @@
pandas
numpy numpy
scipy scipy

147
test.py
View File

@ -1,110 +1,75 @@
import math #import pandas as pn
import numpy as np import numpy as np
from numpy.random import randint
from numpy.linalg import norm
from numpy.fft import fft, ifft from numpy.fft import fft, ifft
from scipy.sparse.linalg import eigs from scipy.sparse.linalg import eigs
import math
from scipy.stats import zscore from scipy.stats import zscore
from scipy.ndimage.interpolation import shift from scipy.ndimage.interpolation import shift
def ncc_c(x,y):
""" def next_greater_power_of_2(x):
>>> ncc_c([1,2,3,4], [1,2,3,4]) return 2**(x-1).bit_length()
array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
0.36666667, 0.13333333])
>>> ncc_c([1,1,1], [1,1,1])
array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333])
>>> ncc_c([1,2,3], [-1,-1,-1])
array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
"""
x_len = len(x)
fft_size = 1<<(2*x_len-1).bit_length()
cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
return np.real(cc) / (norm(x) * norm(y))
def sbd(x, y): def sbd(x, y):
""" fft_size = next_greater_power_of_2(len(x))
>>> sbd([1,1,1], [1,1,1]) cc = np.abs(ifft(fft(x, fft_size) * fft(y, fft_size)))
(-2.2204460492503131e-16, array([1, 1, 1])) ncc = cc / math.sqrt((sum(x**2) * sum(y**2)))
>>> sbd([0,1,2], [1,2,3])
(0.043817112532485103, array([1, 2, 3]))
>>> sbd([1,2,3], [0,1,2])
(0.043817112532485103, array([0, 1, 2]))
"""
ncc = ncc_c(x, y)
idx = ncc.argmax() idx = ncc.argmax()
dist = 1 - ncc[idx] dist = 1 - ncc[idx]
yshift = shift(y, (idx + 1) - max(len(x), len(y))) return dist, shift(y, idx - len(x))
return dist, yshift
def extract_shape(idx, x, j, cur_center):
"""
>>> extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4]))
array([ -1.00000000e+00, -3.06658683e-19, 1.00000000e+00])
>>> extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4]))
array([-0.96836405, 1.02888681, -0.06052275])
"""
_a = []
for i in range(len(idx)):
if idx[i] == j:
if cur_center.sum() == 0:
opt_x = x[i]
else:
_, opt_x = sbd(cur_center, x[i])
_a.append(opt_x)
a = np.array(_a)
if len(a) == 0:
return np.zeros((1, x.shape[1]))
columns = a.shape[1]
y = zscore(a,axis=1,ddof=1)
s = np.dot(y.transpose(), y)
p = np.empty((columns, columns))
p.fill(1.0/columns)
p = np.eye(columns) - p
m = np.dot(np.dot(p, s), p)
_, vec = eigs(m, 1)
centroid = np.real(vec[:,0])
finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum())
finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum())
if finddistance1 >= finddistance2:
centroid *= -1
return zscore(centroid, ddof=1)
def kshape(x, k): def extract_shape(x, c):
""" n = len(x)
>>> kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2) m = len(x[0])
(array([0, 0, 1, 0]), array([[-1.19623139, -0.26273649, 0.26273649, 1.19623139], new_x = np.zeros((n, m))
[-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]])) for i, row in enumerate(x):
""" _, x_i = sbd(c, row)
m = x.shape[0] new_x[i] = x_i
idx = randint(0, k, size=m) s = np.dot(new_x.transpose(), new_x)
centroids = np.zeros((k,x.shape[1])) q = np.identiy(len(s)) - np.ones(len(s)) * 1 / m
M = np.dot(np.dot(q.transpose(), s), q)
_, vec = eigs(M, 1)
return vec[0]
distances = np.empty((m, k))
for _ in range(100): def k_shape(x, k):
old_idx = idx iter_ = 0
idx = np.zeros(len(x)) # TODO dimension, random init
old_idx = np.zeros(len(x))
c = np.zeros((k,)) # TODO dimension
while idx != old_idx and iter_ < 100:
old_idx = idx.copy()
for j in range(k): for j in range(k):
res = extract_shape(idx, x, j, centroids[j]) x_ = []
centroids[j] = res for i, x_i in enumerate(x):
if idx(i) == j:
x_.append(x_i)
c[j] = extract_shape(x_, c[j])
for i, x_i in enumerate(x):
min_dist = np.inf
for j, c_j in enumerate(c):
dist, _ = sbd(c_j, x_i)
if dist < min_dist:
min_dist = dist
idx[i] = j
for i in range(m):
for j in range(k):
distances[i,j] = 1 - max(ncc_c(x[i], centroids[j]))
idx = distances.argmin(1)
if norm(old_idx - idx) == 0:
break
return idx, centroids
def test_extract_shape():
a = zscore(np.ones((3, 10)))
c = np.arange(10)
extract_shape(a, c)
def test_sbd():
a = np.arange(100)
r1 = sbd(zscore(a), zscore(a))
r2 = sbd(zscore(a), zscore(shift(a, 3)))
r3 = sbd(zscore(a), zscore(shift(a, -3)))
r4 = sbd(zscore(a), zscore(shift(a, 30)))
r5 = sbd(zscore(a), zscore(shift(a, -30)))
if __name__ == "__main__": if __name__ == "__main__":
import doctest test_sbd()
doctest.testmod() test_extract_shape()