1 /* 2 * Copyright (C) 2015-2018, by Laszlo Szeremi under the Boost license. 3 * 4 * Pixel Perfect Engine, graphics.transformFunctions module 5 */ 6 7 module PixelPerfectEngine.graphics.transformFunctions; 8 9 package static immutable uint[4] maskAC = [0, uint.max, 0, uint.max]; 10 11 import PixelPerfectEngine.system.platform; 12 13 static if(USE_INTEL_INTRINSICS) import inteli.emmintrin; 14 15 16 /** 17 * Main transform function with fixed point aritmetics. Returns the point where the pixel is needed to be read from. 18 * 256 equals with 1. 19 * The function reads as: 20 * [x',y'] = ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8 + [x_0,y_0] 21 * ABCD: 22 * A/0: Horizontal scaling. 256 means no scaling at all, negative values end up in a mirrored image. 23 * B/1: Horizontal shearing. 0 means no shearing at all. 24 * C/2: Vertical shearing. 0 means no shearing at all. 25 * D/3: Vertical scaling. 256 means no scaling at all, negative values end up in a mirrored image. 26 * </ br> 27 * xy: 28 * Contains the screen coordinates. x:0 y:1 29 * </ br> 30 * x0y0: 31 * Origin point. x_0:0/2 y_0:1/3 32 * </ br> 33 * sXsY: 34 * Scrolling point. sX:0/2 sY:1/3 35 */ 36 public @nogc int[2] transformFunctionInt(short[2] xy, short[4] ABCD, short[2] x0y0, short[2] sXsY) pure nothrow @trusted { 37 //public @nogc int[2] transformFunctionInt(short[4] xy, short[4] ABCD, short[4] x0y0, short[4] sXsY){ 38 /+version(X86){ 39 int[2] result; 40 void subfunc() pure nothrow @nogc @system { 41 asm @nogc pure nothrow{ 42 movd XMM0, xy;//load XY values twice 43 pslldq XMM0, 4; 44 movd XMM2, xy; 45 por XMM0, XMM2; 46 movd XMM1, sXsY;//load SxSy values twice 47 pslldq XMM1, 4; 48 movd XMM2, sXsY; 49 por XMM1, XMM2; 50 paddw XMM0, XMM1;//[x,y] + [sX,sY] 51 movd XMM1, x0y0;//load x0y0 values twice 52 pslldq XMM1, 4; 53 movd XMM2, x0y0; 54 por XMM1, XMM2; 55 psubw XMM0, XMM1;//([x,y] + [sX,sY] - [x_0,y_0]) 56 movq XMM2, ABCD;//load ABCD into XMM2 57 pmaddwd XMM2, XMM0;//([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0])) 58 psrad XMM2, 8;//divide by 256 ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>>8 59 movq result, XMM2; 60 } 61 } 62 63 return [result[0] + x0y0[0], result[1] + x0y0[1]]; 64 } else+/ 65 static if (USE_INTEL_INTRINSICS) { 66 __m128i result; 67 short8 xy_, sXsY_, x0y0_, ABCD_; 68 xy_[0] = xy[0]; 69 xy_[1] = xy[1]; 70 xy_[2] = xy[0]; 71 xy_[3] = xy[1]; 72 sXsY_[0] = sXsY[0]; 73 sXsY_[1] = sXsY[1]; 74 sXsY_[2] = sXsY[0]; 75 sXsY_[3] = sXsY[1]; 76 x0y0_[0] = x0y0[0]; 77 x0y0_[1] = x0y0[1]; 78 x0y0_[2] = x0y0[0]; 79 x0y0_[3] = x0y0[1]; 80 ABCD_[0] = ABCD[0]; 81 ABCD_[1] = ABCD[1]; 82 ABCD_[2] = ABCD[2]; 83 ABCD_[3] = ABCD[3]; 84 xy_ += sXsY_; 85 xy_ -= x0y0_; 86 result = _mm_madd_epi16(cast(__m128i)xy_, cast(__m128i)ABCD_); 87 return [result[0] + x0y0[0], result[1] + x0y0[1]]; 88 } else { 89 int[2] result; 90 int[2] nXnY = [xy[0] + sXsY[0] - x0y0[0], xy[1] + sXsY[1] - x0y0[1]]; 91 result[0] = ((ABCD[0] * nXnY[0] + ABCD[1] * nXnY[1])>>>8) + x0y0[0]; 92 result[1] = ((ABCD[2] * nXnY[0] + ABCD[3] * nXnY[1])>>>8) + x0y0[1]; 93 return result; 94 } 95 } 96 /** 97 * Relative rotation clockwise by given degrees. Returns the new transform points. 98 * </ br> 99 * theta: 100 * Degrees of clockwise rotation. 101 * </ br> 102 * input: 103 * Input of the transform points at 0 degrees. 104 */ 105 public @nogc short[4] rotateFunction(double theta, short[4] input = [256,256,256,256]){ 106 import std.math; 107 short[4] transformPoints; 108 theta *= PI / 180; 109 transformPoints[0] = cast(short)(input[0] * cos(theta)); 110 transformPoints[1] = cast(short)(input[1] * sin(theta)); 111 transformPoints[2] = cast(short)(input[2] * sin(theta) * -1); 112 transformPoints[3] = cast(short)(input[3] * cos(theta)); 113 return transformPoints; 114 } 115 /** 116 * Main transform function, returns the point where the pixel is needed to be read from. 117 * The function reads as: 118 * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0] 119 * ABCD: 120 * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image. 121 * B/1: Horizontal shearing. 0 means no shearing at all. 122 * C/2: Vertical shearing. 0 means no shearing at all. 123 * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image. 124 * </ br> 125 * xy: 126 * Contains the screen coordinates. x:0 y:1 127 * </ br> 128 * x0y0: 129 * Origin point. x_0:0/2 y_0:1/3 130 * </ br> 131 * sXsY: 132 * Scrolling point. sX:0/2 sY:1/3 133 */ 134 public @nogc int[2] transformFunctionFP(int[2] xy, float[4] ABCD, float[4] x0y0, int[4] sXsY){ 135 version(X86){ 136 int[2] result; 137 asm @nogc{ 138 movq XMM7, xy; 139 cvtdq2ps XMM0, XMM7; // ---- ---- yyyy xxxx 140 movups XMM1, XMM0; // ---- ---- YYYY XXXX 141 pslldq XMM1, 8; // YYYY XXXX ---- ---- 142 por XMM0, XMM1; // YYYY XXXX yyyy xxxx 143 movups XMM7, sXsY; 144 cvtdq2ps XMM1, XMM7; 145 addps XMM0, XMM1; // [x,y] + [sX,sY] 146 movups XMM6, x0y0; 147 subps XMM0, XMM6; // [x,y] + [sX,sY] - [x_0,y_0] 148 movups XMM2, ABCD; // dddd cccc bbbb aaaa 149 mulps XMM2, XMM0; //[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) 150 movups XMM3, XMM2; // DDDD CCCC BBBB AAAA 151 psrldq XMM3, 4; // ---- DDDD CCCC BBBB 152 pand XMM2, maskAC; // ---- CCCC ---- AAAA 153 pand XMM3, maskAC; // ---- DDDD ---- BBBB 154 addps XMM2, XMM3; // ---- c+d ---- a+b 155 movups XMM3, XMM2; // ---- C+D ---- A+B 156 psrldq XMM3, 4; // ---- ---- C+D ---- 157 por XMM2, XMM3; // ---- c+d C+D A+B 158 addps XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0] 159 cvttps2dq XMM7, XMM2; 160 movq result, XMM7; 161 } 162 return result; 163 }else version(X86_64){ 164 int[2] result; 165 asm @nogc{ 166 movq XMM7, xy; 167 cvtdq2ps XMM0, XMM7; // ---- ---- yyyy xxxx 168 movups XMM1, XMM0; // ---- ---- YYYY XXXX 169 pslldq XMM1, 8; // YYYY XXXX ---- ---- 170 por XMM0, XMM1; // YYYY XXXX yyyy xxxx 171 movups XMM7, sXsY; 172 cvtdq2ps XMM1, XMM7; 173 addps XMM0, XMM1; // [x,y] + [sX,sY] 174 movups XMM6, x0y0; 175 subps XMM0, XMM6; // [x,y] + [sX,sY] - [x_0,y_0] 176 movups XMM2, ABCD; // dddd cccc bbbb aaaa 177 mulps XMM2, XMM0; //[A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) 178 movups XMM3, XMM2; // DDDD CCCC BBBB AAAA 179 psrldq XMM3, 4; // ---- DDDD CCCC BBBB 180 pand XMM2, maskAC; // ---- CCCC ---- AAAA 181 pand XMM3, maskAC; // ---- DDDD ---- BBBB 182 addps XMM2, XMM3; // ---- c+d ---- a+b 183 movups XMM3, XMM2; // ---- C+D ---- A+B 184 psrldq XMM3, 4; // ---- ---- C+D ---- 185 por XMM2, XMM3; // ---- c+d C+D A+B 186 addps XMM2, XMM6; // [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0] 187 cvttps2dq XMM7, XMM2; 188 movq result, XMM7; 189 } 190 return result; 191 }else{ 192 193 } 194 } 195 /** 196 * Reverse transform function, returns the point where a given texel needs to be written. 197 * The function reads as: 198 * [x',y'] = [A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]) + [x_0,y_0] 199 * ABCD: 200 * A/0: Horizontal scaling. 1 means no scaling at all, negative values end up in a mirrored image. 201 * B/1: Horizontal shearing. 0 means no shearing at all. 202 * C/2: Vertical shearing. 0 means no shearing at all. 203 * D/3: Vertical scaling. 1 means no scaling at all, negative values end up in a mirrored image. 204 * </ br> 205 * xy: 206 * Contains the screen coordinates. x:0 y:1 207 * </ br> 208 * x0y0: 209 * Origin point. x_0:0/2 y_0:1/3 210 * </ br> 211 * sXsY: 212 * Scrolling point. sX:0/2 sY:1/3 213 */ 214 public @nogc int[2] reverseTransformFunctionFP(int[2] xy, float[4] ABCD, int[4] x0y0, float[4] sXsY){ 215 version(X86){ 216 int[2] result; 217 asm @nogc{ 218 movq XMM7, xy; 219 cvtdq2ps XMM0, XMM7; // ---- ---- yyyy xxxx 220 movups XMM1, XMM0; // ---- ---- YYYY XXXX 221 pslldq XMM1, 8; // YYYY XXXX ---- ---- 222 por XMM0, XMM1; // YYYY XXXX yyyy xxxx 223 movups XMM7, sXsY; 224 cvtdq2ps XMM1, XMM7; 225 subps XMM0, XMM1; // [x,y] - [sX,sY] 226 movups XMM6, x0y0; 227 addps XMM0, XMM6; // [x,y] - [sX,sY] + [x_0,y_0] 228 movups XMM2, ABCD; // dddd cccc bbbb aaaa 229 divps XMM2, XMM0; //[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) 230 movups XMM3, XMM2; // DDDD CCCC BBBB AAAA 231 psrldq XMM3, 4; // ---- DDDD CCCC BBBB 232 pand XMM2, maskAC; // ---- CCCC ---- AAAA 233 pand XMM3, maskAC; // ---- DDDD ---- BBBB 234 addps XMM2, XMM3; // ---- c+d ---- a+b 235 movups XMM3, XMM2; // ---- C+D ---- A+B 236 psrldq XMM3, 4; // ---- ---- C+D ---- 237 por XMM2, XMM3; // ---- c+d C+D A+B 238 subps XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0] 239 cvttps2dq XMM7, XMM2; 240 movq result, XMM7; 241 } 242 return result; 243 }else version(X86_64){ 244 int[2] result; 245 asm @nogc{ 246 movq XMM7, xy; 247 cvtdq2ps XMM0, XMM7; // ---- ---- yyyy xxxx 248 movups XMM1, XMM0; // ---- ---- YYYY XXXX 249 pslldq XMM1, 8; // YYYY XXXX ---- ---- 250 por XMM0, XMM1; // YYYY XXXX yyyy xxxx 251 movups XMM7, sXsY; 252 cvtdq2ps XMM1, XMM7; 253 subps XMM0, XMM1; // [x,y] - [sX,sY] 254 movups XMM6, x0y0; 255 addps XMM0, XMM6; // [x,y] - [sX,sY] + [x_0,y_0] 256 movups XMM2, ABCD; // dddd cccc bbbb aaaa 257 divps XMM2, XMM0; //[A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) 258 movups XMM3, XMM2; // DDDD CCCC BBBB AAAA 259 psrldq XMM3, 4; // ---- DDDD CCCC BBBB 260 pand XMM2, maskAC; // ---- CCCC ---- AAAA 261 pand XMM3, maskAC; // ---- DDDD ---- BBBB 262 addps XMM2, XMM3; // ---- c+d ---- a+b 263 movups XMM3, XMM2; // ---- C+D ---- A+B 264 psrldq XMM3, 4; // ---- ---- C+D ---- 265 por XMM2, XMM3; // ---- c+d C+D A+B 266 subps XMM2, XMM6; // [A,B,C,D] / ([x,y] - [sX,sY] + [x_0,y_0]) - [x_0,y_0] 267 cvttps2dq XMM7, XMM2; 268 movq result, XMM7; 269 } 270 return result; 271 }else{ 272 273 } 274 }