1 module pixelperfectengine.audio.base.func;
2 
3 /*
4  * Copyright (C) 2015-2021, by Laszlo Szeremi under the Boost license.
5  *
6  * Pixel Perfect Engine, audio.base.func module.
7  *
8  * Contains common audio functions for mixing, codecs, etc.
9  */
10 
11 import inteli.emmintrin;
12 import bitleveld.reinterpret;
13 import bitleveld.datatypes;
14 
15 import std.math;
16 
17 
18 public import pixelperfectengine.audio.base.types;
19 import pixelperfectengine.system.etc;
20 
21 ///Constant for fast integer to floating point conversion
22 package immutable __m128 CONV_RATIO_RECIPROCAL = __m128(-1.0 / ushort.min);
23 ///For IMA ADPCM
24 ///Needs less storage at the cost of worse quality
25 package static immutable byte[4] ADPCM_INDEX_TABLE_2BIT = 
26 			[-1, 2,
27 			 -1, 2];
28 ///For IMA ADPCM
29 ///Needs less storage at the cost of worse quality
30 package static immutable byte[8] ADPCM_INDEX_TABLE_3BIT = 
31 			[-1, -1, 2, 4,
32 			 -1, -1, 2, 4,];
33 ///For IMA and Dialogic ADPCM
34 ///Standard quality and size
35 package static immutable byte[16] ADPCM_INDEX_TABLE_4BIT = 
36 			[-1, -1, -1, -1, 2, 4, 6, 8, 
37 			 -1, -1, -1, -1, 2, 4, 6, 8];	
38 ///For IMA ADPCM
39 ///Better quality, but needs more storage
40 package static immutable byte[32] ADPCM_INDEX_TABLE_5BIT = 
41 			[-1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16
42 			 -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16];
43 ///For the Yamaha ADPCM A found in YM2610 and probably other chips
44 package static immutable byte[16] Y_ADPCM_INDEX_TABLE =
45 			[-1, -1, -1, -1, 2, 5, 7, 9, 
46 			 -1, -1, -1, -1, 2, 5, 7, 9];
47 ///Most OKI and Yamaha chips seems to use this step-table
48 package static immutable ushort[49] DIALOGIC_ADPCM_STEP_TABLE = 
49 			[16, 17, 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55,
50 			60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157, 173, 190,	
51 			209, 230, 253, 279, 307, 337, 371, 408, 449, 494, 544, 598,
52 			658, 724, 796, 876, 963, 1060, 1166, 1282, 1411, 1552];		
53 /** 
54  * Used by IMA ADPCM and its derivatives.
55  */
56 package static immutable ushort[89] IMA_ADPCM_STEP_TABLE = 
57 			[7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 
58 			19, 21, 23, 25, 28, 31, 34, 37, 41, 45, 
59 			50, 55, 60, 66, 73, 80, 88, 97, 107, 118, 
60 			130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
61 			337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
62 			876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, 
63 			2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
64 			5894, 6484, 7132, 7845, 8630, 9493, 10_442, 11_487, 12_635, 13_899, 
65 			15_289, 16_818, 18_500, 20_350, 22_385, 24_623, 27_086, 29_794, 32_767];
66 /**
67  * Used for decoding Mu-Law encoded PCM samples
68  */
69 package static immutable short[256] MU_LAW_DECODER_TABLE =
70 			[-32_124,-31_100,-30_076,-29_052,-28_028,-27_004,-25_980,-24_956,
71 			-23_932,-22_908,-21_884,-20_860,-19_836,-18_812,-17_788,-16_764,
72 			-15_996,-15_484,-14_972,-14_460,-13_948,-13_436,-12_924,-12_412,
73 			-11_900,-11_388,-10_876,-10_364, -9852, -9340, -8828, -8316,
74 			-7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
75 			-5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
76 			-3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
77 			-2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
78 			-1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
79 			-1372, -1308, -1244, -1180, -1116, -1052,  -988,  -924,
80 			-876,  -844,  -812,  -780,  -748,  -716,  -684,  -652,
81 			-620,  -588,  -556,  -524,  -492,  -460,  -428,  -396,
82 			-372,  -356,  -340,  -324,  -308,  -292,  -276,  -260,
83 			-244,  -228,  -212,  -196,  -180,  -164,  -148,  -132,
84 			-120,  -112,  -104,   -96,   -88,   -80,   -72,   -64,
85 			-56,   -48,   -40,   -32,   -24,   -16,    -8,     -1,
86 			32_124, 31_100, 30_076, 29_052, 28_028, 27_004, 25_980, 24_956,
87 			23_932, 22_908, 21_884, 20_860, 19_836, 18_812, 17_788, 16_764,
88 			15_996, 15_484, 14_972, 14_460, 13_948, 13_436, 12_924, 12_412,
89 			11_900, 11_388, 10_876, 10_364,  9852,  9340,  8828,  8316,
90 			7932,  7676,  7420,  7164,  6908,  6652,  6396,  6140,
91 			5884,  5628,  5372,  5116,  4860,  4604,  4348,  4092,
92 			3900,  3772,  3644,  3516,  3388,  3260,  3132,  3004,
93 			2876,  2748,  2620,  2492,  2364,  2236,  2108,  1980,
94 			1884,  1820,  1756,  1692,  1628,  1564,  1500,  1436,
95 			1372,  1308,  1244,  1180,  1116,  1052,   988,   924,
96 			876,   844,   812,   780,   748,   716,   684,   652,
97 			620,   588,   556,   524,   492,   460,   428,   396,
98 			372,   356,   340,   324,   308,   292,   276,   260,
99 			244,   228,   212,   196,   180,   164,   148,   132,
100 			120,   112,   104,    96,    88,    80,    72,    64,
101 			56,    48,    40,    32,    24,    16,     8,     0];
102 /**
103  * Used for decoding A-Law encoded PCM streams.
104  */
105 package static immutable short[256] A_LAW_DECODER_TABLE = 
106 			[-5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736,
107 			-7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784,
108 			-2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368,
109 			-3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392,
110 			-22_016,-20_992,-24_064,-23_040,-17_920,-16_896,-19_968,-18_944,
111 			-30_208,-29_184,-32_256,-31_232,-26_112,-25_088,-28_160,-27_136,
112 			-11_008,-10_496,-12_032,-11_520,-8960, -8448, -9984, -9472,
113 			-15_104,-14_592,-16_128,-15_616,-13_056,-12_544,-14_080,-13_568,
114 			-344,  -328,  -376,  -360,  -280,  -264,  -312,  -296,
115 			-472,  -456,  -504,  -488,  -408,  -392,  -440,  -424,
116 			-88,   -72,   -120,  -104,  -24,   -8,    -56,   -40,
117 			-216,  -200,  -248,  -232,  -152,  -136,  -184,  -168,
118 			-1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184,
119 			-1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696,
120 			-688,  -656,  -752,  -720,  -560,  -528,  -624,  -592,
121 			-944,  -912,  -1008, -976,  -816,  -784,  -880,  -848,
122 			5504,  5248,  6016,  5760,  4480,  4224,  4992,  4736,
123 			7552,  7296,  8064,  7808,  6528,  6272,  7040,  6784,
124 			2752,  2624,  3008,  2880,  2240,  2112,  2496,  2368,
125 			3776,  3648,  4032,  3904,  3264,  3136,  3520,  3392,
126 			22_016, 20_992, 24_064, 23_040, 17_920, 16_896, 19_968, 18_944,
127 			30_208, 29_184, 32_256, 31_232, 26_112, 25_088, 28_160, 27_136,
128 			11_008, 10_496, 12_032, 11_520, 8960,  8448,  9984,  9472,
129 			15_104, 14_592, 16_128, 15_616, 13_056, 12_544, 14_080, 13_568,
130 			344,   328,   376,   360,   280,   264,   312,   296,
131 			472,   456,   504,   488,   408,   392,   440,   424,
132 			88,    72,   120,   104,    24,     8,    56,    40,
133 			216,   200,   248,   232,   152,   136,   184,   168,
134 			1376,  1312,  1504,  1440,  1120,  1056,  1248,  1184,
135 			1888,  1824,  2016,  1952,  1632,  1568,  1760,  1696,
136 			688,   656,   752,   720,   560,   528,   624,   592,
137 			944,   912,  1008,   976,   816,   784,   880,   848];
138 
139 alias ADPCMStream = NibbleArray;
140 
141 
142 @nogc nothrow pure:
143 	
144 	/**
145 	 * Mixes an audio stream to the destination.
146 	 */
147 	public void mixIntoStream(size_t length, float* src, float* dest, float amount = 1.0, float corr = 0.5) {
148 		const __m128 amountV = __m128(amount);
149 		const __m128 corrV = __m128(corr);
150 		while (length) {
151 			const __m128 srcV = _mm_load_ps(src);
152 			__m128 destV = _mm_load_ps(dest);
153 			destV += srcV * amountV;
154 			destV *= corrV;
155 			_mm_store_ps(dest, destV);
156 			length -= 4;
157 			src += 4;
158 			dest += 4;
159 		}
160 	}
161 	/**
162 	 * Interleaves two channels.
163 	 * `dest` must be as big as the length of `srcL` and `srcR`.
164 	 */
165 	public void interleave(size_t length, float* srcL, float* srcR, float* dest) {
166 		while (length) {
167 			dest[0] = *srcL;
168 			dest[1] = *srcR;
169 			dest += 2;
170 			srcL++;
171 			srcR++;
172 			length--;
173 		}
174 	}
175 	/**
176 	 * Converts a 32 bit extended integer stream to 32 bit floating point.
177 	 */
178 	public void convExIntToFlt(size_t length, int* src, float* dest) {
179 		while (length) {
180 			_mm_store_ps(dest, _mm_cvtepi32_ps(_mm_load_si128(cast(__m128i*)src)) * CONV_RATIO_RECIPROCAL);
181 			length -= 4;
182 			src += 4;
183 			dest += 4;
184 		}
185 	}
186 	/**
187 	 * Decodes an amount of 8 bit unsigned PCM to extended 32 bit.
188 	 * Amount is decided by dest.length. `src` is a full waveform. Position is stored in wp.pos.
189 	 */
190 	public void decode8bitPCM(const(ubyte)[] src, int[] dest, ref DecoderWorkpad wp) @safe {
191 		for (size_t i ; i < dest.length ; i++) {
192 			const ubyte val = src[wp.pos + i];
193 			dest[i] = (val + val<<8) + short.min;
194 		}
195 		wp.pos += dest.length;
196 	}
197 	/**
198 	 * Decodes an amount of 16 bit signed PCM to extended 32 bit.
199 	 * Amount is decided by dest.length. `src` is a full waveform. Position is stored in wp.pos.
200 	 */
201 	public void decode16bitPCM(const(short)[] src, int[] dest, ref DecoderWorkpad wp) @safe {
202 		for (size_t i ; i < dest.length ; i++) {
203 			dest[i] = src[wp.pos + i];
204 		}
205 		wp.pos += dest.length;
206 	}
207 	/**
208 	 * Decodes an amount of 4 bit IMA ADPCM stream to extended 32 bit.
209 	 * Amount is decided by dest.length. `src` is a full waveform. Position is stored in wp.pos.
210 	 */
211 	public void decode4bitIMAADPCM(ADPCMStream src, int[] dest, ref DecoderWorkpad wp) @safe {
212 		for (size_t i ; i < dest.length ; i++) {
213 			const ubyte index = src[i];
214 			uint stepSize;
215 			int d_n;
216 			wp.pred += ADPCM_INDEX_TABLE_4BIT[index];
217 			clamp(wp.pred, 0, 88);
218 			stepSize = IMA_ADPCM_STEP_TABLE[wp.pred];
219 			d_n = ((stepSize) * (index & 0b0100)>>2) + ((stepSize>>1) * (index & 0b0010)>>1) + ((stepSize>>2) * index & 0b0001) 
220 					+ (stepSize>>3);
221 			if(index & 0b1000)
222 				d_n *= -1;
223 			d_n += wp.outn1;
224 			dest[i] = d_n;
225 			wp.outn1 = d_n;
226 		}
227 		wp.pos += dest.length;
228 	}
229 	/**
230 	 * Decodes an amount of 4 bit Oki/Dialogic ADPCM stream to extended 32 bit.
231 	 * Amount is decided by dest.length. `src` is a full waveform. Position is stored in wp.pos.
232 	 */
233 	public void decode4bitDialogicADPCM(ADPCMStream src, int[] dest, ref DecoderWorkpad wp) @safe {
234 		for (size_t i ; i < dest.length ; i++) {
235 			const ubyte index = src[i];
236 			uint stepSize;
237 			int d_n;
238 			wp.pred += ADPCM_INDEX_TABLE_4BIT[index];
239 			clamp(wp.pred, 0, 48);
240 			stepSize = DIALOGIC_ADPCM_STEP_TABLE[wp.pred];
241 			d_n = ((stepSize) * (index & 0b0100)>>2) + ((stepSize>>1) * (index & 0b0010)>>1) + ((stepSize>>2) * index & 0b0001) 
242 					+ (stepSize>>3);
243 			if(index & 0b1000)
244 				d_n *= -1;
245 			d_n += wp.outn1;
246 			dest[i] = (d_n<<4) + (d_n>>8) + short.min;
247 			wp.outn1 = d_n;
248 		}
249 		wp.pos += dest.length;
250 	}
251 	/**
252 	 * Decodes a Mu-Law encoded stream.
253 	 */
254 	public void decodeMuLawStream(const(ubyte)[]src, int[] dest, ref DecoderWorkpad wp) @safe {
255 		for (size_t i ; i < dest.length ; i++) {
256 			dest[i] = MU_LAW_DECODER_TABLE[src[wp.pos + i]];
257 		}
258 		wp.pos += dest.length;
259 	}
260 	/**
261 	 * Decodes an A-Law encoded stream.
262 	 */
263 	public void decodeALawStream(const(ubyte)[]src, int[] dest, ref DecoderWorkpad wp) @safe {
264 		for (size_t i ; i < dest.length ; i++) {
265 			dest[i] = A_LAW_DECODER_TABLE[src[wp.pos + i]];
266 		}
267 		wp.pos += dest.length;
268 	}
269 	/**
270 	 * Streches a buffer to the given amount using no interpolation.
271 	 * Amount decided by `dest.length`.
272 	 * Can be used to pitch the sample.
273 	 */
274 	public void stretchAudioNoIterpol(const(int)[] src, int[] dest, ref WavemodWorkpad wp, uint modifier = 0x1_00_00_00, 
275 			uint clamping = 0xFF) @safe {
276 		//wp.lookupVal &= 0x_FF_FF_FF;
277 		for (size_t i ; i < dest.length /* && wp.lookupVal>>24 < src.length */ ; i++) {
278 			dest[i] = src[cast(size_t)(wp.lookupVal>>24) & clamping];
279 			wp.lookupVal += modifier;
280 		}
281 	}
282 	/**
283 	 * Converts MIDI note to frequency.
284 	 */
285 	public double midiToFreq(int note, const double baseFreq = 440.0) @safe {
286 		double r = note - 69;
287 		r /= 12;
288 		r = pow(2, r);
289 		return r * baseFreq;
290 	}
291 	/**
292 	 * Converts note number to frequency.
293 	 */
294 	public double noteToFreq(double note, const double baseFreq = 440.0) @safe {
295 		double r = note - 69;
296 		r /= 12;
297 		r = pow(2, r);
298 		return r * baseFreq;
299 	}
300 	/** 
301 	 * Bends the frequency by the given amount of seminotes.
302 	 * Params:
303 	 *   freq = The frequency to be modified.
304 	 *   am = The amount of seminotes. Positive means upwards, negative means downwards.
305 	 * Returns: The modified frequency.
306 	 */
307 	public double bendFreq(double freq, double am) @safe {
308 		return freq * pow(2, am / 12);
309 	}
310 	/**
311 	 * Calculates biquad low-pass filter coefficients from the supplied values.
312 	 *
313 	 * fs: Sampling frequency.
314 	 * f0: Corner frequency.
315 	 * q: The Q factor of the filter.
316 	 */
317 	public BiquadFilterValues createLPF(float fs, float f0, float q) @safe {
318 		BiquadFilterValues result;
319 		const float w0 = 2 * PI * f0 / fs;
320 		const float alpha = sin(w0) / (2 * q);
321 		result.b1 = 1 - cos(w0);
322 		result.b0 = result.b1 / 2;
323 		result.b2 = result.b0;
324 		result.a0 = 1 + alpha;
325 		result.a1 = -2 * cos(w0);
326 		result.a2 = 1 - alpha;
327 		return result;
328 	}
329 	/** 
330 	 * Calculates biquad high-pass filter coefficients from the supplied values.
331 	 * Params:
332 	 *   fs = Sampling frequency.
333 	 *   f0 = Corner frequency.
334 	 *   q = The Q factor of the filter.
335 	 * Returns: A struct with the coefficient values for the filter.
336 	 */
337 	public BiquadFilterValues createHPF(float fs, float f0, float q) @safe {
338 		BiquadFilterValues result;
339 		const float w0 = 2 * PI * f0 / fs;
340 		const float alpha = sin(w0) / (2 * q);
341 		result.b1 = (1 + cos(w0)) * -1;
342 		result.b0 = (1 + cos(w0)) / 2;
343 		result.b2 = result.b0;
344 		result.a0 = 1 + alpha;
345 		result.a1 = -2 * cos(w0);
346 		result.a2 = 1 - alpha;
347 		return result;
348 	}
349 	/** 
350 	 * Calculates biquad band pass filter (constant skirt gain, peak gain = Q) filter coefficients from the supplied values.
351 	 * Params:
352 	 *   fs = Sampling frequency.
353 	 *   f0 = Corner frequency.
354 	 *   q = Peak gain.
355 	 * Returns: A struct with the coefficient values for the filter.
356 	 */
357 	public BiquadFilterValues createBPF0(float fs, float f0, float q) @safe {
358 		BiquadFilterValues result;
359 		const float w0 = 2 * PI * f0 / fs;
360 		const float alpha = sin(w0) / (2 * q);
361 		result.b1 = 0;
362 		result.b0 = q * alpha;
363 		result.b2 = result.b0 * -1;
364 		result.a0 = 1 + alpha;
365 		result.a1 = -2 * cos(w0);
366 		result.a2 = 1 - alpha;
367 		return result;
368 	}
369 	/** 
370 	 * Calculates biquad band pass filter (constant 0 db peak gain) filter coefficients from the supplied values.
371 	 * Params:
372 	 *   fs = Sampling frequency.
373 	 *   f0 = Corner frequency.
374 	 *   q = Peak gain.
375 	 * Returns: A struct with the coefficient values for the filter.
376 	 */
377 	public BiquadFilterValues createBPF1(float fs, float f0, float q) @safe {
378 		BiquadFilterValues result;
379 		const float w0 = 2 * PI * f0 / fs;
380 		const float alpha = sin(w0) / (2 * q);
381 		result.b1 = 0;
382 		result.b0 = alpha;
383 		result.b2 = alpha * -1;
384 		result.a0 = 1 + alpha;
385 		result.a1 = -2 * cos(w0);
386 		result.a2 = 1 - alpha;
387 		return result;
388 	}
389 	public BiquadFilterValues createNotchFilt(float fs, float f0, float q) @safe {
390 		BiquadFilterValues result;
391 		const float w0 = 2 * PI * f0 / fs;
392 		const float alpha = sin(w0) / (2 * q);
393 		result.b1 = 0;
394 		result.b0 = alpha;
395 		result.b2 = alpha * -1;
396 		result.a0 = 1 + alpha;
397 		result.a1 = -2 * cos(w0);
398 		result.a2 = 1 - alpha;
399 		return result;
400 	}
401 	/** 
402 	 * Calculates the time factor for an LP6 filter.
403 	 * Filter formula:
404 	 * `y[n] = y[n-1] + (x[n] - y[n-1]) * factor`
405 	 * Where factor is:
406 	 * `1.0 - exp(-1.0 / (timeConstantInSeconds * samplerate))`
407 	 * Params:
408 	 *   fs = Sampling frequency.
409 	 *   f0 = Cutoff frequency.
410 	 * Returns: The `factor` value.
411 	 */
412 	public double calculateLP6factor(float fs, float f0) @safe {
413 		return 1.0 - exp(-1.0 / ((1 / f0) * fs));
414 	}
415 	/** 
416 	 * Creates the alpha value for a HP20 filter.
417 	 * Filter formula:
418 	 * `y[n] = (y[n-1] + x[n] - x[n-1]) * alpha`
419 	 * Where alpha is:
420 	 * `1 / (1 + 2 * pi * timeConstantInSeconds * samplerate)`
421 	 * Params:
422 	 *   fs = Sampling frequency.
423 	 *   f0 = Cutoff frequency.
424 	 * Returns: The `alpha` value.
425 	 */
426 	public double calculateHP20alpha(float fs, float f0) @safe {
427 		return 1 / (1 + 2 * PI * (1 / f0) * fs);
428 	}
429 	/** 
430 	 * Converts MIDI 1.0 14 bit control values to MIDI 2.0 32 bit. Might not work the best with certain values.
431 	 * Params:
432 	 *   msb = most significant 7 bits
433 	 *   lsb = least significant 7 bits
434 	 * Returns: The 32 bit control value.
435 	 */
436 	public uint convertM1CtrlValToM2(ubyte msb, ubyte lsb) @safe {
437 		const uint addedTotal = msb<<7 | lsb;
438 		return cast(uint)(uint.max * (cast(real)(addedTotal) / (ushort.max>>2)));
439 	}
440 	/** 
441 	 * Sets an array (buffer) to all zeros.
442 	 * Params:
443 	 *   targetBuffer = The buffer to be reset.
444 	 */
445 	public void resetBuffer(T)(ref T[] targetBuffer) @safe {
446 		static if (is(T == __m128)) {
447 			for (size_t i ; i < targetBuffer.length ; i++) {
448 				targetBuffer[i] = __m128(0);
449 			}
450 		} else {
451 			for (size_t i ; i < targetBuffer.length ; i++) {
452 				targetBuffer[i] = 0;
453 			}
454 		}
455 	}
456 	double fastPow(double a, double b) @safe {
457 	    union U {
458 	        double d;
459 	        int[2] x;
460 	    }
461 		U u;
462 		u.d = a;
463 	    u.x[1] = cast(int)(b * (u.x[1] - 1_072_632_447) + 1_072_632_447);
464 	    u.x[0] = 0;
465 	    return u.d;
466 	}