Compare commits

...

2 Commits

Author SHA1 Message Date
96bcf6736f add gitignore,envrc 2016-05-12 13:17:44 +00:00
059c4073b9 finish algorithm 2016-05-12 13:16:30 +00:00
4 changed files with 184 additions and 59 deletions

1
.envrc Normal file
View File

@ -0,0 +1 @@
layout python

90
.gitignore vendored Normal file
View File

@ -0,0 +1,90 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
.direnv

View File

@ -1,3 +1,2 @@
pandas
numpy numpy
scipy scipy

151
test.py
View File

@ -1,75 +1,110 @@
#import pandas as pn
import numpy as np
from numpy.fft import fft, ifft
from scipy.sparse.linalg import eigs
import math import math
import numpy as np
from numpy.random import randint
from numpy.linalg import norm
from numpy.fft import fft, ifft
from scipy.sparse.linalg import eigs
from scipy.stats import zscore from scipy.stats import zscore
from scipy.ndimage.interpolation import shift from scipy.ndimage.interpolation import shift
def ncc_c(x,y):
def next_greater_power_of_2(x): """
return 2**(x-1).bit_length() >>> ncc_c([1,2,3,4], [1,2,3,4])
array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
0.36666667, 0.13333333])
>>> ncc_c([1,1,1], [1,1,1])
array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333])
>>> ncc_c([1,2,3], [-1,-1,-1])
array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
"""
x_len = len(x)
fft_size = 1<<(2*x_len-1).bit_length()
cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
return np.real(cc) / (norm(x) * norm(y))
def sbd(x, y): def sbd(x, y):
fft_size = next_greater_power_of_2(len(x)) """
cc = np.abs(ifft(fft(x, fft_size) * fft(y, fft_size))) >>> sbd([1,1,1], [1,1,1])
ncc = cc / math.sqrt((sum(x**2) * sum(y**2))) (-2.2204460492503131e-16, array([1, 1, 1]))
>>> sbd([0,1,2], [1,2,3])
(0.043817112532485103, array([1, 2, 3]))
>>> sbd([1,2,3], [0,1,2])
(0.043817112532485103, array([0, 1, 2]))
"""
ncc = ncc_c(x, y)
idx = ncc.argmax() idx = ncc.argmax()
dist = 1 - ncc[idx] dist = 1 - ncc[idx]
return dist, shift(y, idx - len(x)) yshift = shift(y, (idx + 1) - max(len(x), len(y)))
return dist, yshift
def extract_shape(idx, x, j, cur_center):
"""
>>> extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4]))
array([ -1.00000000e+00, -3.06658683e-19, 1.00000000e+00])
>>> extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4]))
array([-0.96836405, 1.02888681, -0.06052275])
"""
_a = []
for i in range(len(idx)):
if idx[i] == j:
if cur_center.sum() == 0:
opt_x = x[i]
else:
_, opt_x = sbd(cur_center, x[i])
_a.append(opt_x)
a = np.array(_a)
if len(a) == 0:
return np.zeros((1, x.shape[1]))
columns = a.shape[1]
y = zscore(a,axis=1,ddof=1)
s = np.dot(y.transpose(), y)
p = np.empty((columns, columns))
p.fill(1.0/columns)
p = np.eye(columns) - p
m = np.dot(np.dot(p, s), p)
_, vec = eigs(m, 1)
centroid = np.real(vec[:,0])
finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum())
finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum())
if finddistance1 >= finddistance2:
centroid *= -1
return zscore(centroid, ddof=1)
def extract_shape(x, c): def kshape(x, k):
n = len(x) """
m = len(x[0]) >>> kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2)
new_x = np.zeros((n, m)) (array([0, 0, 1, 0]), array([[-1.19623139, -0.26273649, 0.26273649, 1.19623139],
for i, row in enumerate(x): [-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]]))
_, x_i = sbd(c, row) """
new_x[i] = x_i m = x.shape[0]
s = np.dot(new_x.transpose(), new_x) idx = randint(0, k, size=m)
q = np.identiy(len(s)) - np.ones(len(s)) * 1 / m centroids = np.zeros((k,x.shape[1]))
M = np.dot(np.dot(q.transpose(), s), q)
_, vec = eigs(M, 1)
return vec[0]
distances = np.empty((m, k))
def k_shape(x, k): for _ in range(100):
iter_ = 0 old_idx = idx
idx = np.zeros(len(x)) # TODO dimension, random init
old_idx = np.zeros(len(x))
c = np.zeros((k,)) # TODO dimension
while idx != old_idx and iter_ < 100:
old_idx = idx.copy()
for j in range(k): for j in range(k):
x_ = [] res = extract_shape(idx, x, j, centroids[j])
for i, x_i in enumerate(x): centroids[j] = res
if idx(i) == j:
x_.append(x_i)
c[j] = extract_shape(x_, c[j])
for i, x_i in enumerate(x):
min_dist = np.inf
for j, c_j in enumerate(c):
dist, _ = sbd(c_j, x_i)
if dist < min_dist:
min_dist = dist
idx[i] = j
for i in range(m):
for j in range(k):
distances[i,j] = 1 - max(ncc_c(x[i], centroids[j]))
idx = distances.argmin(1)
if norm(old_idx - idx) == 0:
break
return idx, centroids
def test_extract_shape():
a = zscore(np.ones((3, 10)))
c = np.arange(10)
extract_shape(a, c)
def test_sbd():
a = np.arange(100)
r1 = sbd(zscore(a), zscore(a))
r2 = sbd(zscore(a), zscore(shift(a, 3)))
r3 = sbd(zscore(a), zscore(shift(a, -3)))
r4 = sbd(zscore(a), zscore(shift(a, 30)))
r5 = sbd(zscore(a), zscore(shift(a, -30)))
if __name__ == "__main__": if __name__ == "__main__":
test_sbd() import doctest
test_extract_shape() doctest.testmod()