SIMD für Stufe 1 der FFT
This commit is contained in:
parent
818574c62e
commit
04828296f7
@ -126,6 +126,7 @@ int fix_fft_org(fixed fr[], fixed fi[], int m, int inverse)
|
||||
wr >>= 1;
|
||||
wi >>= 1;
|
||||
}
|
||||
|
||||
for(i=m; i<n; i+=istep)
|
||||
{
|
||||
|
||||
@ -148,6 +149,7 @@ int fix_fft_org(fixed fr[], fixed fi[], int m, int inverse)
|
||||
fi[i] = qi + ti;
|
||||
}
|
||||
}
|
||||
|
||||
--k;
|
||||
l = istep;
|
||||
}
|
||||
|
49
fft.c
49
fft.c
@ -112,26 +112,41 @@ int fix_fft(fixed fr[], fixed fi[], int m, int inverse)
|
||||
istep = l << 1; //step width of current butterfly
|
||||
|
||||
FFT_reg reg;
|
||||
FFT_reg_simd simd_r, simd_i;
|
||||
fixed *reg_s = ((fixed*) ®);
|
||||
|
||||
for(m=0; m<n; m+=istep)
|
||||
if(l == 1)
|
||||
{
|
||||
for(i=m; i<m+l; ++i)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
reg_s[3] = fr[i];
|
||||
reg_s[2] = fr[j];
|
||||
reg_s[1] = fi[i];
|
||||
reg_s[0] = fi[j];
|
||||
|
||||
FFT_calc(reg, i << k, (xtbool) shift, inverse);
|
||||
|
||||
fr[i] = reg_s[3];
|
||||
fr[j] = reg_s[2];
|
||||
fi[i] = reg_s[1];
|
||||
fi[j] = reg_s[0];
|
||||
}
|
||||
for(i=0; i<n; i+=8)
|
||||
{
|
||||
simd_r = FFT_simd_load(fr, i);
|
||||
simd_i = FFT_simd_load(fi, i);
|
||||
FFT_simd_first(simd_r, simd_i, (xtbool) shift);
|
||||
FFT_simd_store(fr, i, simd_r);
|
||||
FFT_simd_store(fi, i, simd_i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(m=0; m<n; m+=istep)
|
||||
{
|
||||
for(i=m; i<m+l; ++i)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
reg_s[3] = fr[i];
|
||||
reg_s[2] = fr[j];
|
||||
reg_s[1] = fi[i];
|
||||
reg_s[0] = fi[j];
|
||||
|
||||
FFT_calc(reg, i << k, (xtbool) shift, inverse);
|
||||
|
||||
fr[i] = reg_s[3];
|
||||
fr[j] = reg_s[2];
|
||||
fi[i] = reg_s[1];
|
||||
fi[j] = reg_s[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
--k;
|
||||
l = istep;
|
||||
|
77
fft_inst.tie
77
fft_inst.tie
@ -132,6 +132,62 @@ table SIN_wave 16 1024 {
|
||||
|
||||
regfile FFT_reg 64 2 fftv
|
||||
|
||||
regfile FFT_reg_simd 128 2 fftsv
|
||||
|
||||
function [31:0] FFT_var_shift([31:0] data, [3:0] sh)
|
||||
{
|
||||
assign FFT_var_shift = TIEmux(sh,
|
||||
data[31:0],
|
||||
{data[30:0], 1'b0},
|
||||
{data[29:0], 2'b0},
|
||||
{data[28:0], 3'b0},
|
||||
{data[27:0], 4'b0},
|
||||
{data[26:0], 5'b0},
|
||||
{data[25:0], 6'b0},
|
||||
{data[24:0], 7'b0},
|
||||
{data[23:0], 8'b0},
|
||||
{data[22:0], 9'b0},
|
||||
{data[21:0], 10'b0},
|
||||
{data[20:0], 11'b0},
|
||||
{data[19:0], 12'b0},
|
||||
{data[18:0], 13'b0},
|
||||
{data[17:0], 14'b0},
|
||||
{data[16:0], 15'b0}
|
||||
);
|
||||
}
|
||||
|
||||
operation FFT_simd_load {in AR *base, in AR offset, out FFT_reg_simd data} {out VAddr, in MemDataIn128}
|
||||
{
|
||||
assign VAddr = TIEadd(base, {offset[30:0], 1'b0}, 1'b0);
|
||||
|
||||
wire [15:0] o1 = MemDataIn128[15:0];
|
||||
wire [15:0] o2 = MemDataIn128[31:16];
|
||||
wire [15:0] o3 = MemDataIn128[47:32];
|
||||
wire [15:0] o4 = MemDataIn128[63:48];
|
||||
wire [15:0] o5 = MemDataIn128[79:64];
|
||||
wire [15:0] o6 = MemDataIn128[95:80];
|
||||
wire [15:0] o7 = MemDataIn128[111:96];
|
||||
wire [15:0] o8 = MemDataIn128[127:112];
|
||||
|
||||
assign data = {o1, o2, o3, o4, o5, o6, o7, o8 };
|
||||
}
|
||||
|
||||
operation FFT_simd_store {in AR *base, in AR offset, in FFT_reg_simd data} {out VAddr, out MemDataOut128}
|
||||
{
|
||||
assign VAddr = TIEadd(base, {offset[30:0], 1'b0}, 1'b0);
|
||||
|
||||
wire [15:0] o1 = data[15:0];
|
||||
wire [15:0] o2 = data[31:16];
|
||||
wire [15:0] o3 = data[47:32];
|
||||
wire [15:0] o4 = data[63:48];
|
||||
wire [15:0] o5 = data[79:64];
|
||||
wire [15:0] o6 = data[95:80];
|
||||
wire [15:0] o7 = data[111:96];
|
||||
wire [15:0] o8 = data[127:112];
|
||||
|
||||
assign MemDataOut128 = {o1, o2, o3, o4, o5, o6, o7, o8 };
|
||||
}
|
||||
|
||||
operation FFT_shift_check {in AR *base, in AR offset, out AR needs_shift} {out VAddr, in MemDataIn128}
|
||||
{
|
||||
assign VAddr = TIEadd(base, offset[31:1], 1'b0);
|
||||
@ -243,6 +299,22 @@ operation FFT_calc {inout FFT_reg data, in AR i, in BR shift, in BR inverse} {}
|
||||
assign data = FFT_butterfly(data, wr, wi, shift);
|
||||
}
|
||||
|
||||
// 4 butterflies at once
|
||||
operation FFT_simd_first {inout FFT_reg_simd fr, inout FFT_reg_simd fi, in BR shift} {}
|
||||
{
|
||||
|
||||
wire [15:0] wr = TIEmux(shift, 16'h7fff, 16'h3fff);
|
||||
wire [15:0] wi = 16'b0;
|
||||
|
||||
wire [63:0] res1 = FFT_butterfly({fr[127:96], fi[127:96]}, wr, wi, shift);
|
||||
wire [63:0] res2 = FFT_butterfly({fr[95:64], fi[95:64]}, wr, wi, shift);
|
||||
wire [63:0] res3 = FFT_butterfly({fr[63:32], fi[63:32]}, wr, wi, shift);
|
||||
wire [63:0] res4 = FFT_butterfly({fr[31:0], fi[31:0]}, wr, wi, shift);
|
||||
|
||||
assign fr = { res1[63:32], res2[63:32], res3[63:32], res4[63:32] };
|
||||
assign fi = { res1[31:0], res2[31:0], res3[31:0], res4[31:0] };
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Generated by XPRES v4.0.4
|
||||
@ -321,11 +393,11 @@ slot_opcodes Inst {
|
||||
FFT_bit_reverse, FFT_calc, ld.FFT_reg,
|
||||
mv.FFT_reg, st.FFT_reg }
|
||||
slot_opcodes flix64_0_slot0 {
|
||||
ADD, BEQZ, BGEU, BLTU, BNEZ, L16SI, MOV.N, NOP, S16I, SEXT, SUB, FFT_shift_check }
|
||||
ADD, BEQZ, BGEU, BLTU, BNEZ, L16SI, MOV.N, NOP, S16I, SEXT, SUB, FFT_shift_check, FFT_simd_load, FFT_simd_store }
|
||||
slot_opcodes flix64_0_slot1 {
|
||||
ADD, ADDX2, MOV.N, MOVNEZ, NEG, NOP }
|
||||
slot_opcodes flix64_0_slot2 {
|
||||
ADD, L16SI, NOP, S16I, SUB, FFT_shift_check }
|
||||
ADD, L16SI, NOP, S16I, SUB, FFT_shift_check, FFT_simd_load, FFT_simd_store }
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
@ -335,3 +407,4 @@ slot_opcodes flix64_0_slot2 {
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
ctype FFT_reg 64 64 FFT_reg default
|
||||
ctype FFT_reg_simd 128 128 FFT_reg_simd default
|
||||
|
Loading…
Reference in New Issue
Block a user