2016-05-25 10:28:12 +00:00

#### 177 lines 5.6 KiB Python Raw Permalink Blame History

 ```import math ``` ```import numpy as np ``` ``` ``` ```from numpy.random import randint, seed ``` ```from numpy.linalg import norm, eigh ``` ```from numpy.linalg import norm ``` ```from numpy.fft import fft, ifft ``` ``` ``` ``` ``` ```def zscore(a, axis=0, ddof=0): ``` ``` a = np.asanyarray(a) ``` ``` mns = a.mean(axis=axis) ``` ``` sstd = a.std(axis=axis, ddof=ddof) ``` ``` if axis and mns.ndim < a.ndim: ``` ``` return ((a - np.expand_dims(mns, axis=axis)) / ``` ``` np.expand_dims(sstd,axis=axis)) ``` ``` else: ``` ``` return (a - mns) / sstd ``` ``` ``` ```def roll_zeropad(a, shift, axis=None): ``` ``` a = np.asanyarray(a) ``` ``` if shift == 0: return a ``` ``` if axis is None: ``` ``` n = a.size ``` ``` reshape = True ``` ``` else: ``` ``` n = a.shape[axis] ``` ``` reshape = False ``` ``` if np.abs(shift) > n: ``` ``` res = np.zeros_like(a) ``` ``` elif shift < 0: ``` ``` shift += n ``` ``` zeros = np.zeros_like(a.take(np.arange(n-shift), axis)) ``` ``` res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis) ``` ``` else: ``` ``` zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis)) ``` ``` res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis) ``` ``` if reshape: ``` ``` return res.reshape(a.shape) ``` ``` else: ``` ``` return res ``` ``` ``` ```# TODO vectorized version of _ncc_c ``` ```#def _ncc_c(x,y): ``` ```# """ ``` ```# >>> _ncc_c(np.array([[1,2,3,4]]), np.array([[1,2,3,4]])) ``` ```# array([[ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, ``` ```# 0.36666667, 0.13333333]]) ``` ```# >>> _ncc_c(np.array([[1,1,1]]), np.array([[1,1,1]])) ``` ```# array([[ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]]) ``` ```# >>> _ncc_c(np.array([[1,2,3]]), np.array([[-1,-1,-1]])) ``` ```# array([[-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]]) ``` ```# """ ``` ```# x_len = x.shape[1] ``` ```# fft_size = 1<<(2*x_len-1).bit_length() ``` ```# cc = ifftn(fftn(x, (fft_size,)) * np.conj(fftn(y, (fft_size,)))) ``` ```# cc = np.concatenate((cc[:, -(x_len-1):], cc[:, :x_len]), axis=1) ``` ```# return np.real(cc) / (norm(x) * norm(y)) ``` ``` ``` ```def _ncc_c(x,y): ``` ``` """ ``` ``` >>> _ncc_c([1,2,3,4], [1,2,3,4]) ``` ``` array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, ``` ``` 0.36666667, 0.13333333]) ``` ``` >>> _ncc_c([1,1,1], [1,1,1]) ``` ``` array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]) ``` ``` >>> _ncc_c([1,2,3], [-1,-1,-1]) ``` ``` array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]) ``` ``` """ ``` ``` x_len = len(x) ``` ``` fft_size = 1<<(2*x_len-1).bit_length() ``` ``` cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size))) ``` ``` cc = np.concatenate((cc[-(x_len-1):], cc[:x_len])) ``` ``` return np.real(cc) / (norm(x) * norm(y)) ``` ``` ``` ``` ``` ```def _sbd(x, y): ``` ``` """ ``` ``` >>> _sbd([1,1,1], [1,1,1]) ``` ``` (-2.2204460492503131e-16, array([1, 1, 1])) ``` ``` >>> _sbd([0,1,2], [1,2,3]) ``` ``` (0.043817112532485103, array([1, 2, 3])) ``` ``` >>> _sbd([1,2,3], [0,1,2]) ``` ``` (0.043817112532485103, array([0, 1, 2])) ``` ``` """ ``` ``` ncc = _ncc_c(x, y) ``` ``` idx = ncc.argmax() ``` ``` dist = 1 - ncc[idx] ``` ``` yshift = roll_zeropad(y, (idx + 1) - max(len(x), len(y))) ``` ``` ``` ``` return dist, yshift ``` ``` ``` ``` ``` ```#@profile ``` ```def _extract_shape(idx, x, j, cur_center): ``` ``` """ ``` ``` >>> _extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4])) ``` ``` array([-1., 0., 1.]) ``` ``` >>> _extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4])) ``` ``` array([-0.96836405, 1.02888681, -0.06052275]) ``` ``` >>> _extract_shape(np.array([1,0,1,0]), np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 0, np.array([0,0,0,0])) ``` ``` array([-1.2089303 , -0.19618238, 0.19618238, 1.2089303 ]) ``` ``` >>> _extract_shape(np.array([0,0,1,0]), np.array([[1,2,3,4],[0,1,2,3],[-1,1,-1,1],[1,2,2,3]]), 0, np.array([-1.2089303,-0.19618238,0.19618238,1.2089303])) ``` ``` array([-1.19623139, -0.26273649, 0.26273649, 1.19623139]) ``` ``` """ ``` ``` _a = [] ``` ``` for i in range(len(idx)): ``` ``` if idx[i] == j: ``` ``` if cur_center.sum() == 0: ``` ``` opt_x = x[i] ``` ``` else: ``` ``` _, opt_x = _sbd(cur_center, x[i]) ``` ``` _a.append(opt_x) ``` ``` a = np.array(_a) ``` ``` ``` ``` if len(a) == 0: ``` ``` return np.zeros((1, x.shape[1])) ``` ``` columns = a.shape[1] ``` ``` y = zscore(a,axis=1,ddof=1) ``` ``` s = np.dot(y.transpose(), y) ``` ``` ``` ``` p = np.empty((columns, columns)) ``` ``` p.fill(1.0/columns) ``` ``` p = np.eye(columns) - p ``` ``` ``` ``` m = np.dot(np.dot(p, s), p) ``` ``` _, vec = eigh(m) ``` ``` centroid = vec[:,-1] ``` ``` finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum()) ``` ``` finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum()) ``` ``` ``` ``` if finddistance1 >= finddistance2: ``` ``` centroid *= -1 ``` ``` ``` ``` return zscore(centroid, ddof=1) ``` ``` ``` ```def _kshape(x, k): ``` ``` """ ``` ``` >>> from numpy.random import seed; seed(0) ``` ``` >>> _kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2) ``` ``` (array([0, 0, 1, 0]), array([[-1.2244258 , -0.35015476, 0.52411628, 1.05046429], ``` ``` [-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]])) ``` ``` """ ``` ``` m = x.shape[0] ``` ``` idx = randint(0, k, size=m) ``` ``` centroids = np.zeros((k,x.shape[1])) ``` ``` distances = np.empty((m, k)) ``` ``` ``` ``` for _ in range(100): ``` ``` old_idx = idx ``` ``` for j in range(k): ``` ``` centroids[j] = _extract_shape(idx, x, j, centroids[j]) ``` ``` ``` ``` for i in range(m): ``` ``` for j in range(k): ``` ``` distances[i,j] = 1 - max(_ncc_c(x[i], centroids[j])) ``` ``` idx = distances.argmin(1) ``` ``` if np.array_equal(old_idx, idx): ``` ``` break ``` ``` ``` ``` return idx, centroids ``` ``` ``` ```def kshape(x, k): ``` ``` idx, centroids = _kshape(np.array(x), k) ``` ``` clusters = [] ``` ``` for i, centroid in enumerate(centroids): ``` ``` series = [] ``` ``` for j, val in enumerate(idx): ``` ``` if i == val: ``` ``` series.append(j) ``` ``` clusters.append((centroid, series)) ``` ``` return clusters ``` ``` ``` ```if __name__ == "__main__": ``` ``` import doctest ``` ``` doctest.testmod() ```