1 /*
2  * Copyright (C) 2015-2018, by Laszlo Szeremi under the Boost license.
3  *
4  * Pixel Perfect Engine, graphics.transformFunctions module
5  */
6 
7 module PixelPerfectEngine.graphics.transformFunctions;
8 
9 package static immutable uint[4] maskAC = [0, uint.max, 0, uint.max];
10 
11 
12 /**
13  * Main transform function with fixed point aritmetics. Returns the point where the pixel is needed to be read from.
14  * 256 equals with 1.
15  * The function reads as:
16  * [x',y'] = ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8 + [x_0,y_0]
17  * ABCD: 
18  * A/0: Horizontal scaling. 256 means no scaling at all, negative values end up in a mirrored image.
19  * B/1: Horizontal shearing. 0 means no shearing at all.
20  * C/2: Vertical shearing. 0 means no shearing at all.
21  * D/3: Vertical scaling. 256 means no scaling at all, negative values end up in a mirrored image.
22  * </ br>
23  * xy:
24  * Contains the screen coordinates. x:0 y:1
25  * </ br>
26  * x0y0:
27  * Origin point. x_0:0/2 y_0:1/3
28  * </ br>
29  * sXsY:
30  * Scrolling point. sX:0/2 sY:1/3
31  */
32 public @nogc int[2] transformFunctionInt(short[2] xy, short[4] ABCD, short[2] x0y0, short[2] sXsY){
33 //public @nogc int[2] transformFunctionInt(short[4] xy, short[4] ABCD, short[4] x0y0, short[4] sXsY){
34 	version(X86){
35 		int[2] result;
36 		asm @nogc{
37 			movd	XMM0, xy;//load XY values twice
38 			pslldq	XMM0, 4;
39 			movd	XMM2, xy;
40 			por		XMM0, XMM2;
41 			movd	XMM1, sXsY;//load SxSy values twice
42 			pslldq	XMM1, 4;
43 			movd	XMM2, sXsY;
44 			por		XMM1, XMM2;
45 			paddw	XMM0, XMM1;//[x,y] + [sX,sY]
46 			movd	XMM1, x0y0;//load x0y0 values twice
47 			pslldq	XMM1, 4;
48 			movd	XMM2, x0y0;
49 			por		XMM1, XMM2;
50 			psubw	XMM0, XMM1;//([x,y] + [sX,sY] - [x_0,y_0])
51 			movq	XMM2, ABCD;//load ABCD into XMM2
52 			pmaddwd	XMM2, XMM0;//([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))
53 			psrad	XMM2, 8;//divide by 256 ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8
54 			movq	result, XMM2;
55 		}
56 		/*asm @nogc{
57 			movq	XMM0, xy;//load XY values twice
58 			movq	XMM1, sXsY;//load SxSy values twice
59 			pslldq	XMM1, 4;
60 			movd	XMM1, sXsY;
61 			por		XMM1, XMM2;
62 			paddw	XMM0, XMM1;//[x,y] + [sX,sY]
63 			movd	XMM1, x0y0;//load SxSy values twice
64 			pslldq	XMM1, 4;
65 			movd	XMM1, x0y0;
66 			por		XMM1, XMM2;
67 			psubw	XMM0, XMM1;//([x,y] + [sX,sY] - [x_0,y_0])
68 			movq	XMM2, ABCD;//load ABCD into XMM2
69 			pmaddwd	XMM2, XMM0;//([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))
70 			psrad	XMM2, 8;//divide by 256 ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8
71 			movq	result, XMM2;
72 		}*/
73 		return [result[0] + x0y0[0], result[1] + x0y0[1]];
74 	}else{
75 		int[2] result;
76 		int[2] nXnY = [xy[0] + sXsY[0] - x0y0[0],  xy[1] + sXsY[1] - x0y0[1]];
77 		result[0] = ((ABCD[0] * nXnY[0] + ABCD[1] * nXnY[1])>>>8) + x0y0[0];
78 		result[1] = ((ABCD[2] * nXnY[0] + ABCD[3] * nXnY[1])>>>8) + x0y0[1];
79 		return result;
80 	}
81 }
82 /**
83  * Relative rotation clockwise by given degrees. Returns the new transform points.
84  * </ br>
85  * theta:
86  * Degrees of clockwise rotation.
87  * </ br>
88  * input:
89  * Input of the transform points at 0 degrees.
90  */
91 public @nogc short[4] rotateFunction(double theta, short[4] input = [256,256,256,256]){
92 	import std.math;
93 	short[4] transformPoints;
94 	theta *= PI / 180;
95 	transformPoints[0] = cast(short)(input[0] * cos(theta));
96 	transformPoints[1] = cast(short)(input[1] * sin(theta));
97 	transformPoints[2] = cast(short)(input[2] * sin(theta) * -1);
98 	transformPoints[3] = cast(short)(input[3] * cos(theta));
99 	return transformPoints;
100 }
101 /**
102  * Main transform function, returns the point where the pixel is needed to be read from.
103  * The function reads as:
104  * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
105  * ABCD: 
106  * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image.
107  * B/1: Horizontal shearing. 0 means no shearing at all.
108  * C/2: Vertical shearing. 0 means no shearing at all.
109  * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image.
110  * </ br>
111  * xy:
112  * Contains the screen coordinates. x:0 y:1
113  * </ br>
114  * x0y0:
115  * Origin point. x_0:0/2 y_0:1/3
116  * </ br>
117  * sXsY:
118  * Scrolling point. sX:0/2 sY:1/3
119  */
120 public @nogc int[2] transformFunctionFP(int[2] xy, float[4] ABCD, float[4] x0y0, int[4] sXsY){
121 	version(X86){
122 		int[2] result;
123 		asm @nogc{
124 			movq		XMM7, xy;
125 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
126 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
127 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
128 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
129 			movups		XMM7, sXsY;
130 			cvtdq2ps	XMM1, XMM7;
131 			addps		XMM0, XMM1; // [x,y] + [sX,sY]
132 			movups		XMM6, x0y0;
133 			subps		XMM0, XMM6;	// [x,y] + [sX,sY] - [x_0,y_0]
134 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
135 			mulps		XMM2, XMM0;	//[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0])
136 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
137 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
138 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
139 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
140 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
141 			movups		XMM3, XMM2; // ---- C+D ---- A+B
142 			psrldq		XMM3, 4;	// ---- ---- C+D ----
143 			por			XMM2, XMM3; // ---- c+d C+D A+B
144 			addps		XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
145 			cvttps2dq	XMM7, XMM2;
146 			movq		result, XMM7;
147 		}
148 		return result;
149 	}else version(X86_64){
150 		int[2] result;
151 		asm @nogc{
152 			movq		XMM7, xy;
153 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
154 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
155 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
156 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
157 			movups		XMM7, sXsY;
158 			cvtdq2ps	XMM1, XMM7;
159 			addps		XMM0, XMM1; // [x,y] + [sX,sY]
160 			movups		XMM6, x0y0;
161 			subps		XMM0, XMM6;	// [x,y] + [sX,sY] - [x_0,y_0]
162 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
163 			mulps		XMM2, XMM0;	//[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0])
164 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
165 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
166 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
167 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
168 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
169 			movups		XMM3, XMM2; // ---- C+D ---- A+B
170 			psrldq		XMM3, 4;	// ---- ---- C+D ----
171 			por			XMM2, XMM3; // ---- c+d C+D A+B
172 			addps		XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
173 			cvttps2dq	XMM7, XMM2;
174 			movq		result, XMM7;
175 		}
176 		return result;
177 	}else{
178 			
179 	}
180 }
181 /**
182  * Reverse transform function, returns the point where a given texel needs to be written.
183  * The function reads as:
184  * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0]
185  * ABCD: 
186  * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image.
187  * B/1: Horizontal shearing. 0 means no shearing at all.
188  * C/2: Vertical shearing. 0 means no shearing at all.
189  * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image.
190  * </ br>
191  * xy:
192  * Contains the screen coordinates. x:0 y:1
193  * </ br>
194  * x0y0:
195  * Origin point. x_0:0/2 y_0:1/3
196  * </ br>
197  * sXsY:
198  * Scrolling point. sX:0/2 sY:1/3
199  */
200 public @nogc int[2] reverseTransformFunctionFP(int[2] xy, float[4] ABCD, int[4] x0y0, float[4] sXsY){
201 	version(X86){
202 		int[2] result;
203 		asm @nogc{
204 			movq		XMM7, xy;
205 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
206 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
207 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
208 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
209 			movups		XMM7, sXsY;
210 			cvtdq2ps	XMM1, XMM7;
211 			subps		XMM0, XMM1; // [x,y] - [sX,sY]
212 			movups		XMM6, x0y0;
213 			addps		XMM0, XMM6;	// [x,y] - [sX,sY] + [x_0,y_0]
214 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
215 			divps		XMM2, XMM0;	//[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0])
216 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
217 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
218 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
219 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
220 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
221 			movups		XMM3, XMM2; // ---- C+D ---- A+B
222 			psrldq		XMM3, 4;	// ---- ---- C+D ----
223 			por			XMM2, XMM3; // ---- c+d C+D A+B
224 			subps		XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0]
225 			cvttps2dq	XMM7, XMM2;
226 			movq		result, XMM7;
227 		}
228 		return result;
229 	}else version(X86_64){
230 		int[2] result;
231 		asm @nogc{
232 			movq		XMM7, xy;
233 			cvtdq2ps	XMM0, XMM7;	// ---- ---- yyyy xxxx
234 			movups		XMM1, XMM0;	// ---- ---- YYYY XXXX
235 			pslldq		XMM1, 8;	// YYYY XXXX ---- ----
236 			por			XMM0, XMM1; // YYYY XXXX yyyy xxxx
237 			movups		XMM7, sXsY;
238 			cvtdq2ps	XMM1, XMM7;
239 			subps		XMM0, XMM1; // [x,y] - [sX,sY]
240 			movups		XMM6, x0y0;
241 			addps		XMM0, XMM6;	// [x,y] - [sX,sY] + [x_0,y_0]
242 			movups		XMM2, ABCD;	// dddd cccc bbbb aaaa
243 			divps		XMM2, XMM0;	//[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0])
244 			movups		XMM3, XMM2; // DDDD CCCC BBBB AAAA
245 			psrldq		XMM3, 4;	// ---- DDDD CCCC BBBB
246 			pand		XMM2, maskAC;	// ---- CCCC ---- AAAA
247 			pand		XMM3, maskAC;	// ---- DDDD ---- BBBB
248 			addps		XMM2, XMM3;	// ---- c+d ---- a+b
249 			movups		XMM3, XMM2; // ---- C+D ---- A+B
250 			psrldq		XMM3, 4;	// ---- ---- C+D ----
251 			por			XMM2, XMM3; // ---- c+d C+D A+B
252 			subps		XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0]
253 			cvttps2dq	XMM7, XMM2;
254 			movq		result, XMM7;
255 		}
256 		return result;
257 	}else{
258 			
259 	}
260 }