remove dependency on scipy

This commit is contained in:
Jörg Thalheim 2016-05-17 12:23:16 +00:00
parent e0cebc8c4d
commit d3d3cdfa96
1 changed files with 83 additions and 25 deletions

108
kshape.py
View File

@ -1,22 +1,72 @@
import math import math
import numpy as np import numpy as np
from numpy.random import randint from numpy.random import randint, seed
from numpy.linalg import norm, eigh
from numpy.linalg import norm from numpy.linalg import norm
from numpy.fft import fft, ifft from numpy.fft import fft, ifft
from scipy.sparse.linalg import eigs
from scipy.stats import zscore #from scipy.linalg import eigh
from scipy.ndimage.interpolation import shift
def zscore(a, axis=0, ddof=0):
a = np.asanyarray(a)
mns = a.mean(axis=axis)
sstd = a.std(axis=axis, ddof=ddof)
if axis and mns.ndim < a.ndim:
return ((a - np.expand_dims(mns, axis=axis)) /
np.expand_dims(sstd,axis=axis))
else:
return (a - mns) / sstd
def roll_zeropad(a, shift, axis=None):
a = np.asanyarray(a)
if shift == 0: return a
if axis is None:
n = a.size
reshape = True
else:
n = a.shape[axis]
reshape = False
if np.abs(shift) > n:
res = np.zeros_like(a)
elif shift < 0:
shift += n
zeros = np.zeros_like(a.take(np.arange(n-shift), axis))
res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis)
else:
zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis))
res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis)
if reshape:
return res.reshape(a.shape)
else:
return res
# TODO vectorized version of _ncc_c
#def _ncc_c(x,y):
# """
# >>> _ncc_c(np.array([[1,2,3,4]]), np.array([[1,2,3,4]]))
# array([[ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
# 0.36666667, 0.13333333]])
# >>> _ncc_c(np.array([[1,1,1]]), np.array([[1,1,1]]))
# array([[ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]])
# >>> _ncc_c(np.array([[1,2,3]]), np.array([[-1,-1,-1]]))
# array([[-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]])
# """
# x_len = x.shape[1]
# fft_size = 1<<(2*x_len-1).bit_length()
# cc = ifftn(fftn(x, (fft_size,)) * np.conj(fftn(y, (fft_size,))))
# cc = np.concatenate((cc[:, -(x_len-1):], cc[:, :x_len]), axis=1)
# return np.real(cc) / (norm(x) * norm(y))
def _ncc_c(x,y): def _ncc_c(x,y):
""" """
>>> ncc_c([1,2,3,4], [1,2,3,4]) >>> _ncc_c([1,2,3,4], [1,2,3,4])
array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667, array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
0.36666667, 0.13333333]) 0.36666667, 0.13333333])
>>> ncc_c([1,1,1], [1,1,1]) >>> _ncc_c([1,1,1], [1,1,1])
array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333]) array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333])
>>> ncc_c([1,2,3], [-1,-1,-1]) >>> _ncc_c([1,2,3], [-1,-1,-1])
array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005]) array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
""" """
x_len = len(x) x_len = len(x)
@ -28,25 +78,32 @@ def _ncc_c(x,y):
def _sbd(x, y): def _sbd(x, y):
""" """
>>> sbd([1,1,1], [1,1,1]) >>> _sbd([1,1,1], [1,1,1])
(-2.2204460492503131e-16, array([1, 1, 1])) (-2.2204460492503131e-16, array([1, 1, 1]))
>>> sbd([0,1,2], [1,2,3]) >>> _sbd([0,1,2], [1,2,3])
(0.043817112532485103, array([1, 2, 3])) (0.043817112532485103, array([1, 2, 3]))
>>> sbd([1,2,3], [0,1,2]) >>> _sbd([1,2,3], [0,1,2])
(0.043817112532485103, array([0, 1, 2])) (0.043817112532485103, array([0, 1, 2]))
""" """
ncc = _ncc_c(x, y) ncc = _ncc_c(x, y)
idx = ncc.argmax() idx = ncc.argmax()
dist = 1 - ncc[idx] dist = 1 - ncc[idx]
yshift = shift(y, (idx + 1) - max(len(x), len(y))) yshift = roll_zeropad(y, (idx + 1) - max(len(x), len(y)))
return dist, yshift return dist, yshift
#@profile
def _extract_shape(idx, x, j, cur_center): def _extract_shape(idx, x, j, cur_center):
""" """
>>> extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4])) >>> _extract_shape(np.array([0,1,2]), np.array([[1,2,3], [4,5,6]]), 1, np.array([0,3,4]))
array([ -1.00000000e+00, -3.06658683e-19, 1.00000000e+00]) array([-1., 0., 1.])
>>> extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4])) >>> _extract_shape(np.array([0,1,2]), np.array([[-1,2,3], [4,-5,6]]), 1, np.array([0,3,4]))
array([-0.96836405, 1.02888681, -0.06052275]) array([-0.96836405, 1.02888681, -0.06052275])
>>> _extract_shape(np.array([1,0,1,0]), np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 0, np.array([0,0,0,0]))
array([-1.2089303 , -0.19618238, 0.19618238, 1.2089303 ])
>>> _extract_shape(np.array([0,0,1,0]), np.array([[1,2,3,4],[0,1,2,3],[-1,1,-1,1],[1,2,2,3]]), 0, np.array([-1.2089303,-0.19618238,0.19618238,1.2089303]))
array([-1.19623139, -0.26273649, 0.26273649, 1.19623139])
""" """
_a = [] _a = []
for i in range(len(idx)): for i in range(len(idx)):
@ -69,39 +126,40 @@ def _extract_shape(idx, x, j, cur_center):
p = np.eye(columns) - p p = np.eye(columns) - p
m = np.dot(np.dot(p, s), p) m = np.dot(np.dot(p, s), p)
_, vec = eigs(m, 1) _, vec = eigh(m)
centroid = np.real(vec[:,0]) centroid = vec[:,-1]
finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum()) finddistance1 = math.sqrt(((a[0] - centroid) ** 2).sum())
finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum()) finddistance2 = math.sqrt(((a[0] + centroid) ** 2).sum())
if finddistance1 >= finddistance2: if finddistance1 >= finddistance2:
centroid *= -1 centroid *= -1
return zscore(centroid, ddof=1)
return zscore(centroid, ddof=1)
def _kshape(x, k): def _kshape(x, k):
""" """
>>> kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2) >>> from numpy.random import seed; seed(0)
(array([0, 0, 1, 0]), array([[-1.19623139, -0.26273649, 0.26273649, 1.19623139], >>> _kshape(np.array([[1,2,3,4], [0,1,2,3], [-1,1,-1,1], [1,2,2,3]]), 2)
(array([0, 0, 1, 0]), array([[-1.2244258 , -0.35015476, 0.52411628, 1.05046429],
[-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]])) [-0.8660254 , 0.8660254 , -0.8660254 , 0.8660254 ]]))
""" """
m = x.shape[0] m = x.shape[0]
idx = randint(0, k, size=m) idx = randint(0, k, size=m)
centroids = np.zeros((k,x.shape[1])) centroids = np.zeros((k,x.shape[1]))
distances = np.empty((m, k)) distances = np.empty((m, k))
for _ in range(100): for _ in range(100):
old_idx = idx old_idx = idx
for j in range(k): for j in range(k):
res = _extract_shape(idx, x, j, centroids[j]) centroids[j] = _extract_shape(idx, x, j, centroids[j])
centroids[j] = res
for i in range(m): for i in range(m):
for j in range(k): for j in range(k):
distances[i,j] = 1 - max(_ncc_c(x[i], centroids[j])) distances[i,j] = 1 - max(_ncc_c(x[i], centroids[j]))
idx = distances.argmin(1) idx = distances.argmin(1)
if norm(old_idx - idx) == 0: if np.array_equal(old_idx, idx):
break break
return idx, centroids return idx, centroids
def kshape(x, k): def kshape(x, k):