1 /*
2  * Copyright (C) 2015-2018, by Laszlo Szeremi under the Boost license.
3  *
4  * Pixel Perfect Engine, graphics.transformFunctions module
5  */
6 
7 module PixelPerfectEngine.graphics.transformFunctions;
8 
9 package static immutable uint[4] maskAC = [0, uint.max, 0, uint.max];
10 
11 import PixelPerfectEngine.system.platform;
12 
13 static if(USE_INTEL_INTRINSICS) import inteli.emmintrin;
14 
15 
16 /**
17  * Main transform function with fixed point aritmetics. Returns the point where the pixel is needed to be read from.
18  * 256 equals with 1.
19  * The function reads as:
20  * [x',y'] = ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8 + [x_0,y_0]
21  * ABCD: 
22  * A/0: Horizontal scaling. 256 means no scaling at all, negative values end up in a mirrored image.
23  * B/1: Horizontal shearing. 0 means no shearing at all.
24  * C/2: Vertical shearing. 0 means no shearing at all.
25  * D/3: Vertical scaling. 256 means no scaling at all, negative values end up in a mirrored image.
26  * </ br>
27  * xy:
28  * Contains the screen coordinates. x:0 y:1
29  * </ br>
30  * x0y0:
31  * Origin point. x_0:0/2 y_0:1/3
32  * </ br>
33  * sXsY:
34  * Scrolling point. sX:0/2 sY:1/3
35  */
36 public @nogc int[2] transformFunctionInt(short[2] xy, short[4] ABCD, short[2] x0y0, short[2] sXsY) pure nothrow @trusted {
37 //public @nogc int[2] transformFunctionInt(short[4] xy, short[4] ABCD, short[4] x0y0, short[4] sXsY){
38 	/+version(X86){
39 		int[2] result;
40 		void subfunc() pure nothrow @nogc @system {
41 			asm @nogc pure nothrow{
42 				movd	XMM0, xy;//load XY values twice
43 				pslldq	XMM0, 4;
44 				movd	XMM2, xy;
45 				por		XMM0, XMM2;
46 				movd	XMM1, sXsY;//load SxSy values twice
47 				pslldq	XMM1, 4;
48 				movd	XMM2, sXsY;
49 				por		XMM1, XMM2;
50 				paddw	XMM0, XMM1;//[x,y] + [sX,sY]
51 				movd	XMM1, x0y0;//load x0y0 values twice
52 				pslldq	XMM1, 4;
53 				movd	XMM2, x0y0;
54 				por		XMM1, XMM2;
55 				psubw	XMM0, XMM1;//([x,y] + [sX,sY] - [x_0,y_0])
56 				movq	XMM2, ABCD;//load ABCD into XMM2
57 				pmaddwd	XMM2, XMM0;//([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))
58 				psrad	XMM2, 8;//divide by 256 ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8
59 				movq	result, XMM2;
60 			}
61 		}
62 		
63 		return [result[0] + x0y0[0], result[1] + x0y0[1]];
64 	} else+/ 
65 	static if (USE_INTEL_INTRINSICS) {
66 		__m128i result;
67 		short8 xy_, sXsY_, x0y0_, ABCD_;
68 		xy_[0] = xy[0];
69 		xy_[1] = xy[1];
70 		xy_[2] = xy[0];
71 		xy_[3] = xy[1];
72 		sXsY_[0] = sXsY[0];
73 		sXsY_[1] = sXsY[1];
74 		sXsY_[2] = sXsY[0];
75 		sXsY_[3] = sXsY[1];
76 		x0y0_[0] = x0y0[0];
77 		x0y0_[1] = x0y0[1];
78 		x0y0_[2] = x0y0[0];
79 		x0y0_[3] = x0y0[1];
80 		ABCD_[0] = ABCD[0];
81 		ABCD_[1] = ABCD[1];
82 		ABCD_[2] = ABCD[2];
83 		ABCD_[3] = ABCD[3];
84 		xy_ += sXsY_;
85 		xy_ -= x0y0_;
86 		result = _mm_madd_epi16(cast(__m128i)xy_, cast(__m128i)ABCD_);
87 		return [result[0] + x0y0[0], result[1] + x0y0[1]];
88 	} else {
89 		int[2] result;
90 		int[2] nXnY = [xy[0] + sXsY[0] - x0y0[0],  xy[1] + sXsY[1] - x0y0[1]];
91 		result[0] = ((ABCD[0] * nXnY[0] + ABCD[1] * nXnY[1])>>>8) + x0y0[0];
92 		result[1] = ((ABCD[2] * nXnY[0] + ABCD[3] * nXnY[1])>>>8) + x0y0[1];
93 		return result;
94 	}
95 }
96 /**
97  * Relative rotation clockwise by given degrees. Returns the new transform points.
98  * </ br>
99  * theta:
100  * Degrees of clockwise rotation.
101  * </ br>
102  * input:
103  * Input of the transform points at 0 degrees.
104  */
105 public @nogc short[4] rotateFunction(double theta, short[4] input = [256,256,256,256]){
106 	import std.math;
107 	short[4] transformPoints;
108 	theta *= PI / 180;
109 	transformPoints[0] = cast(short)(input[0] * cos(theta));
110 	transformPoints[1] = cast(short)(input[1] * sin(theta));
111 	transformPoints[2] = cast(short)(input[2] * sin(theta) * -1);
112 	transformPoints[3] = cast(short)(input[3] * cos(theta));
113 	return transformPoints;
114 }
115 /**
116  * Main transform function, returns the point where the pixel is needed to be read from.
117  * The function reads as:
118  * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
119  * ABCD: 
120  * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image.
121  * B/1: Horizontal shearing. 0 means no shearing at all.
122  * C/2: Vertical shearing. 0 means no shearing at all.
123  * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image.
124  * </ br>
125  * xy:
126  * Contains the screen coordinates. x:0 y:1
127  * </ br>
128  * x0y0:
129  * Origin point. x_0:0/2 y_0:1/3
130  * </ br>
131  * sXsY:
132  * Scrolling point. sX:0/2 sY:1/3
133  */
134 public @nogc int[2] transformFunctionFP(int[2] xy, float[4] ABCD, float[4] x0y0, int[4] sXsY){
135 	version(X86){
136 		int[2] result;
137 		asm @nogc{
138 			movq		XMM7, xy;
139 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
140 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
141 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
142 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
143 			movups		XMM7, sXsY;
144 			cvtdq2ps	XMM1, XMM7;
145 			addps		XMM0, XMM1; // [x,y] + [sX,sY]
146 			movups		XMM6, x0y0;
147 			subps		XMM0, XMM6;	// [x,y] + [sX,sY] - [x_0,y_0]
148 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
149 			mulps		XMM2, XMM0;	//[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0])
150 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
151 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
152 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
153 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
154 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
155 			movups		XMM3, XMM2; // ---- C+D ---- A+B
156 			psrldq		XMM3, 4;	// ---- ---- C+D ----
157 			por			XMM2, XMM3; // ---- c+d C+D A+B
158 			addps		XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
159 			cvttps2dq	XMM7, XMM2;
160 			movq		result, XMM7;
161 		}
162 		return result;
163 	}else version(X86_64){
164 		int[2] result;
165 		asm @nogc{
166 			movq		XMM7, xy;
167 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
168 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
169 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
170 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
171 			movups		XMM7, sXsY;
172 			cvtdq2ps	XMM1, XMM7;
173 			addps		XMM0, XMM1; // [x,y] + [sX,sY]
174 			movups		XMM6, x0y0;
175 			subps		XMM0, XMM6;	// [x,y] + [sX,sY] - [x_0,y_0]
176 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
177 			mulps		XMM2, XMM0;	//[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0])
178 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
179 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
180 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
181 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
182 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
183 			movups		XMM3, XMM2; // ---- C+D ---- A+B
184 			psrldq		XMM3, 4;	// ---- ---- C+D ----
185 			por			XMM2, XMM3; // ---- c+d C+D A+B
186 			addps		XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
187 			cvttps2dq	XMM7, XMM2;
188 			movq		result, XMM7;
189 		}
190 		return result;
191 	}else{
192 			
193 	}
194 }
195 /**
196  * Reverse transform function, returns the point where a given texel needs to be written.
197  * The function reads as:
198  * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
199  * ABCD: 
200  * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image.
201  * B/1: Horizontal shearing. 0 means no shearing at all.
202  * C/2: Vertical shearing. 0 means no shearing at all.
203  * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image.
204  * </ br>
205  * xy:
206  * Contains the screen coordinates. x:0 y:1
207  * </ br>
208  * x0y0:
209  * Origin point. x_0:0/2 y_0:1/3
210  * </ br>
211  * sXsY:
212  * Scrolling point. sX:0/2 sY:1/3
213  */
214 public @nogc int[2] reverseTransformFunctionFP(int[2] xy, float[4] ABCD, int[4] x0y0, float[4] sXsY){
215 	version(X86){
216 		int[2] result;
217 		asm @nogc{
218 			movq		XMM7, xy;
219 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
220 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
221 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
222 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
223 			movups		XMM7, sXsY;
224 			cvtdq2ps	XMM1, XMM7;
225 			subps		XMM0, XMM1; // [x,y] - [sX,sY]
226 			movups		XMM6, x0y0;
227 			addps		XMM0, XMM6;	// [x,y] - [sX,sY] + [x_0,y_0]
228 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
229 			divps		XMM2, XMM0;	//[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0])
230 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
231 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
232 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
233 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
234 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
235 			movups		XMM3, XMM2; // ---- C+D ---- A+B
236 			psrldq		XMM3, 4;	// ---- ---- C+D ----
237 			por			XMM2, XMM3; // ---- c+d C+D A+B
238 			subps		XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0]
239 			cvttps2dq	XMM7, XMM2;
240 			movq		result, XMM7;
241 		}
242 		return result;
243 	}else version(X86_64){
244 		int[2] result;
245 		asm @nogc{
246 			movq		XMM7, xy;
247 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
248 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
249 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
250 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
251 			movups		XMM7, sXsY;
252 			cvtdq2ps	XMM1, XMM7;
253 			subps		XMM0, XMM1; // [x,y] - [sX,sY]
254 			movups		XMM6, x0y0;
255 			addps		XMM0, XMM6;	// [x,y] - [sX,sY] + [x_0,y_0]
256 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
257 			divps		XMM2, XMM0;	//[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0])
258 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
259 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
260 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
261 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
262 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
263 			movups		XMM3, XMM2; // ---- C+D ---- A+B
264 			psrldq		XMM3, 4;	// ---- ---- C+D ----
265 			por			XMM2, XMM3; // ---- c+d C+D A+B
266 			subps		XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0]
267 			cvttps2dq	XMM7, XMM2;
268 			movq		result, XMM7;
269 		}
270 		return result;
271 	}else{
272 			
273 	}
274 }