diff --git a/kshape.py b/kshape.py index 307872c..b72c2a3 100644 --- a/kshape.py +++ b/kshape.py @@ -1,22 +1,72 @@ import math import numpy as np -from numpy.random import randint +from numpy.random import randint, seed +from numpy.linalg import norm, eigh from numpy.linalg import norm from numpy.fft import fft, ifft -from scipy.sparse.linalg import eigs -from scipy.stats import zscore -from scipy.ndimage.interpolation import shift + +#from scipy.linalg import eigh + +def zscore(a, axis=0, ddof=0): + a = np.asanyarray(a) + mns = a.mean(axis=axis) + sstd = a.std(axis=axis, ddof=ddof) + if axis and mns.ndim < a.ndim: + return ((a - np.expand_dims(mns, axis=axis)) / + np.expand_dims(sstd,axis=axis)) + else: + return (a - mns) / sstd + +def roll_zeropad(a, shift, axis=None): + a = np.asanyarray(a) + if shift == 0: return a + if axis is None: + n = a.size + reshape = True + else: + n = a.shape[axis] + reshape = False + if np.abs(shift) > n: + res = np.zeros_like(a) + elif shift < 0: + shift += n + zeros = np.zeros_like(a.take(np.arange(n-shift), axis)) + res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis) + else: + zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis)) + res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis) + if reshape: + return res.reshape(a.shape) + else: + return res + +# TODO vectorized version of _ncc_c +#def _ncc_c(x,y): +# """ +# >>> _ncc_c(np.array([[1,2,3,4]]), np.array([[1,2,3,4]])) +# array([[ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, +# 0.36666667, 0.13333333]]) +# >>> _ncc_c(np.array([[1,1,1]]), np.array([[1,1,1]])) +# array([[ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]]) +# >>> _ncc_c(np.array([[1,2,3]]), np.array([[-1,-1,-1]])) +# array([[-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]]) +# """ +# x_len = x.shape[1] +# fft_size = 1<<(2*x_len-1).bit_length() +# cc = ifftn(fftn(x, (fft_size,)) * np.conj(fftn(y, (fft_size,)))) +# cc = np.concatenate((cc[:, -(x_len-1):], cc[:, :x_len]), axis=1) +# return np.real(cc) / (norm(x) * norm(y)) def _ncc_c(x,y): """ - >>> ncc_c([1,2,3,4], [1,2,3,4]) + >>> _ncc_c([1,2,3,4], [1,2,3,4]) array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, 0.36666667, 0.13333333]) - >>> ncc_c([1,1,1], [1,1,1]) + >>> _ncc_c([1,1,1], [1,1,1]) array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]) - >>> ncc_c([1,2,3], [-1,-1,-1]) + >>> _ncc_c([1,2,3], [-1,-1,-1]) array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]) """ x_len = len(x) @@ -28,25 +78,32 @@ def _ncc_c(x,y): def _sbd(x, y): """ - >>> sbd([1,1,1], [1,1,1]) + >>> _sbd([1,1,1], [1,1,1]) (-2.2204460492503131e-16, array([1, 1, 1])) - >>> sbd([0,1,2], [1,2,3]) + >>> _sbd([0,1,2], [1,2,3]) (0.043817112532485103, array([1, 2, 3])) - >>> sbd([1,2,3], [0,1,2]) + >>> _sbd([1,2,3], [0,1,2]) (0.043817112532485103, array([0, 1, 2])) """ ncc = _ncc_c(x, y) idx = ncc.argmax() dist = 1 - ncc[idx] - yshift = shift(y, (idx + 1) - max(len(x), len(y))) + yshift = roll_zeropad(y, (idx + 1) - max(len(x), len(y))) + return dist, yshift + +#@profile def _extract_shape(idx, x, j, cur_center): """ - >>> extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4])) - array([ -1.00000000e+00, -3.06658683e-19, 1.00000000e+00]) - >>> extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4])) + >>> _extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4])) + array([-1., 0., 1.]) + >>> _extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4])) array([-0.96836405, 1.02888681, -0.06052275]) + >>> _extract_shape(np.array([1,0,1,0]), np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 0, np.array([0,0,0,0])) + array([-1.2089303 , -0.19618238, 0.19618238, 1.2089303 ]) + >>> _extract_shape(np.array([0,0,1,0]), np.array([[1,2,3,4],[0,1,2,3],[-1,1,-1,1],[1,2,2,3]]), 0, np.array([-1.2089303,-0.19618238,0.19618238,1.2089303])) + array([-1.19623139, -0.26273649, 0.26273649, 1.19623139]) """ _a = [] for i in range(len(idx)): @@ -69,39 +126,40 @@ def _extract_shape(idx, x, j, cur_center): p = np.eye(columns) - p m = np.dot(np.dot(p, s), p) - _, vec = eigs(m, 1) - centroid = np.real(vec[:,0]) + _, vec = eigh(m) + centroid = vec[:,-1] finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum()) finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum()) if finddistance1 >= finddistance2: centroid *= -1 - return zscore(centroid, ddof=1) + return zscore(centroid, ddof=1) def _kshape(x, k): """ - >>> kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2) - (array([0, 0, 1, 0]), array([[-1.19623139, -0.26273649, 0.26273649, 1.19623139], + >>> from numpy.random import seed; seed(0) + >>> _kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2) + (array([0, 0, 1, 0]), array([[-1.2244258 , -0.35015476, 0.52411628, 1.05046429], [-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]])) """ m = x.shape[0] idx = randint(0, k, size=m) centroids = np.zeros((k,x.shape[1])) - distances = np.empty((m, k)) + for _ in range(100): old_idx = idx for j in range(k): - res = _extract_shape(idx, x, j, centroids[j]) - centroids[j] = res + centroids[j] = _extract_shape(idx, x, j, centroids[j]) for i in range(m): - for j in range(k): - distances[i,j] = 1 - max(_ncc_c(x[i], centroids[j])) + for j in range(k): + distances[i,j] = 1 - max(_ncc_c(x[i], centroids[j])) idx = distances.argmin(1) - if norm(old_idx - idx) == 0: + if np.array_equal(old_idx, idx): break + return idx, centroids def kshape(x, k):