1 module PixelPerfectEngine.audio.firFilter;
2 
3 /*
4  * Copyright (C) 2015-2018, by Laszlo Szeremi under the Boost license.
5  *
6  * Pixel Perfect Engine, FIR filter module
7  */
8 
9 import PixelPerfectEngine.system.etc;
10 
11 version(LDC){
12 	import inteli.emmintrin;
13 	import core.stdc.stdlib;
14 	import core.stdc..string;
15 }
16 /**
17  * Defines a finite impulse response.
18  */
19 public struct FiniteImpulseResponse(int L)
20 		if(isPowerOf2(L)){
21 	//static assert(L % 2 == 0);
22 	public short[L] vals;	///Holds the values.
23 }
24 /**
25  * Implements a finite impulse response filter.
26  */
27 public struct FiniteImpulseResponseFilter(int L)
28 		if(isPowerOf2(L)){
29 	FiniteImpulseResponse!L* impulseResponse;	///Pointer to the impulse response
30 	private short[L + 8] delayLine;				///Contains the delay line
31 	private uint stepping;
32 	private const uint truncating = L - 1;
33 	this(FiniteImpulseResponse!L* impulseResponse){
34 		this.impulseResponse = impulseResponse;
35 
36 	}
37 	version(LDC){
38 		public @nogc int calculate(short input){
39 			int4 result;
40 			memcpy(delayLine.ptr + L, delayLine.ptr, 16);
41 			delayLine[L - stepping] = input;
42 
43 			for(int i ; i < L ; i+=8){
44 				short8* src = cast(short8*)cast(void*)impulseResponse.vals.ptr;
45 				short8* dlPtr = cast(short8*)cast(void*)(delayLine.ptr + (stepping + i & truncating));
46 				result += _mm_madd_epi16(*src, *dlPtr);
47 			}
48 			stepping++;
49 			stepping &= truncating;
50 			return result[0] + result[1] + result[2] + result[3];
51 		}
52 	}else{
53 		public @nogc int calculate(short input){
54 			if(stepping < 3){
55 				delayLine[L + (L - stepping)] = input;
56 			}
57 			delayLine[L - stepping] = input;
58 			version(X86){
59 				int[4] result;
60 				asm @nogc{
61 					mov		ESI, impulseResponse[EBP];
62 					mov		EDI, delayLine[EBP];
63 					mov		EDX, stepping;
64 					mov		EAX, truncating;
65 					mov		ECX, L;
66 
67 				filterloop:
68 					mov		EBX, EDX;
69 					and		EBX, EAX;
70 					add		EBX, EDI;
71 					movups	XMM0, [EBX];
72 					movups	XMM1, [ESI];
73 					pmaddwd	XMM1, XMM0;
74 					paddd	XMM2, XMM1;
75 					add		ESI, 16;
76 					add		EDX, 16;
77 					dec		ECX;
78 					cmp		ECX, 0;
79 					jnz		filterloop;
80 					movups	result, XMM2;
81 				}
82 				stepping++;
83 				stepping &= truncating;
84 				return result[0] + result[1] + result[2] + result[3];
85 			}else version(X86_64){
86 				int[4] result;
87 				asm @nogc{
88 					mov		RSI, impulseResponse[RBP];
89 					mov		RDI, delayLine[RBP];
90 					mov		EDX, stepping;
91 					mov		EAX, truncating;
92 					mov		ECX, L;
93 
94 				filterloop:
95 					mov		RBX, RDX;
96 					and		RBX, RAX;
97 					add		RBX, RDI;
98 					movups	XMM0, [RBX];
99 					movups	XMM1, [RSI];
100 					mulps	XMM1, XMM0;
101 					addps	XMM2, XMM1;
102 					add		RSI, 16;
103 					add		RDX, 16;
104 					dec		ECX;
105 					cmp		ECX, 0;
106 					jnz		filterloop;
107 					movups	result, XMM2;
108 				}
109 				stepping++;
110 				stepping &= truncating;
111 				return result[0] + result[1] + result[2] + result[3];
112 			}else{
113 				int result;
114 				for(int i; i < L; i++){
115 					result += delayLine[(i + stepping) & truncating] * impulseResponse.vals[i];
116 				}
117 				stepping++;
118 				stepping &= truncating;
119 				return result;
120 			}
121 
122 		}
123 	}
124 }