1 /* 2 * Copyright (C) 2015-2017, by Laszlo Szeremi under the Boost license. 3 * 4 * Pixel Perfect Engine, graphics.layers module 5 */ 6 module PixelPerfectEngine.graphics.layers; 7 8 public import PixelPerfectEngine.graphics.bitmap; 9 public import PixelPerfectEngine.graphics.common; 10 import std.conv; 11 import std.stdio; 12 import std.parallelism; 13 //import system.etc; 14 import PixelPerfectEngine.system.exc; 15 import std.algorithm; 16 import derelict.sdl2.sdl; 17 //import std.range; 18 19 20 //Used mainly to return both the color ID and the transparency at the same time to reduce CPU time. 21 /*public struct PixelData { 22 public bool alpha; 23 public ushort color; 24 this(bool a, ushort c){ 25 alpha = a; 26 color = c; 27 } 28 }*/ 29 30 static immutable ushort[4] alphaMMXmul_const256 = [256,256,256,256]; 31 static immutable ushort[4] alphaMMXmul_const1 = [1,1,1,1]; 32 static immutable ushort[8] alphaSSEConst256 = [256,256,256,256,256,256,256,256]; 33 static immutable ushort[8] alphaSSEConst1 = [1,1,1,1,1,1,1,1]; 34 static immutable uint[4] SSEUQWmaxvalue = [0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF] ; 35 36 //static immutable uint[2] alphaMMXmul_0 = [1,1]; 37 38 public enum FlipRegister : ubyte { 39 NORM = 0x00, 40 X = 0x01, 41 Y = 0x02, 42 XY = 0x03 43 } 44 45 /*public interface ILayer{ 46 // Returns color. 47 //public ushort getPixel(ushort x, ushort y); 48 // Returns if the said pixel's color is equals with the transparent color index. 49 //public bool isTransparent(ushort x, ushort y); 50 // Returns the PixelData. 51 //public PixelData getPixelData(ushort x, ushort y); 52 53 public void setRasterizer(int rX, int rY); 54 public void updateRaster(Bitmap16Bit frameBuffer); 55 public void updateRaster(void* workpad, int pitch, ubyte[] palette); 56 }*/ 57 58 abstract class Layer { 59 60 61 // scrolling position 62 private int sX, sY, rasterX, rasterY; 63 //Deprecated 64 //private ushort transparencyIndex; 65 //Deprecated. Set color 0 as transparent instead 66 /*public void setTransparencyIndex(ushort color){ 67 transparencyIndex = color; 68 }*/ 69 70 public void setRasterizer(int rX, int rY){ 71 //frameBuffer = frameBufferP; 72 rasterX=rX; 73 rasterY=rY; 74 75 } 76 77 //Absolute scrolling. 78 public void scroll(int x, int y){ 79 sX=x; 80 sY=y; 81 } 82 //Relative scrolling. Positive values scrolls the layer left and up, negative values scrolls the layer down and right. 83 public void relScroll(int x, int y){ 84 sX=sX+x; 85 sY=sY+y; 86 } 87 //Getters for the scroll positions. 88 public int getSX(){ 89 return sX; 90 } 91 public int getSY(){ 92 return sY; 93 } 94 /// Override this to enable output to the raster 95 public abstract void updateRaster(void* workpad, int pitch, ubyte[] palette, int[] threads); 96 97 98 } 99 100 public struct BLInfo{ 101 public int tileX, tileY, mX, mY; 102 this(int tileX1,int tileY1,int x1,int y1){ 103 tileX = tileX1; 104 tileY = tileY1; 105 mX = x1; 106 mY = y1; 107 } 108 } 109 /** 110 * Sets the rendering mode of the TileLayer. 111 * 112 * COPY is the fastest, but overrides any kind of transparency keying. It directly writes into the framebuffer. Should only be used for certain applications, like bottom layers. 113 * BLITTER uses a custom BitBlT algorithm for the SSE2 instruction set. Automatically generates the copying mask depending on the alpha-value. Any alpha-value that's non-zero will cause a non-transparent pixel, and all zeros are completely transparent. Gradual transparency in not avaliable. 114 * ALPHA_BLENDING uses SSE2 for alpha blending. The slowest, but allows gradual transparencies. 115 */ 116 public enum TileLayerRenderingMode{ 117 COPY, 118 BLITTER, 119 ALPHA_BLENDING 120 } 121 /* 122 *Used by the background-sprite tester. 123 */ 124 public interface ITileLayer{ 125 public BLInfo getLayerInfo(); 126 public Bitmap16Bit getTile(wchar id); 127 public wchar[] getMapping(); 128 } 129 /** 130 * General purpose TileLayer with palette support, mainly for backgrounds. 131 * Use multiple of this class for paralax scrolling. 132 */ 133 public class TileLayer : Layer, ITileLayer{ 134 private int tileX, tileY, mX, mY; 135 private int totalX, totalY; 136 private wchar[] mapping; 137 private TileLayerRenderingMode renderMode; 138 private Bitmap16Bit[wchar] tileSet; 139 private bool wrapMode; 140 ///Constructor. tX , tY : Set the size of the tiles on the layer. 141 this(int tX, int tY, TileLayerRenderingMode renderMode = TileLayerRenderingMode.ALPHA_BLENDING){ 142 tileX=tX; 143 tileY=tY; 144 this.renderMode = renderMode; 145 } 146 /// Wrapmode: if enabled, the layer will be turned into an "infinite" mode. 147 public void setWrapMode(bool w){ 148 wrapMode = w; 149 } 150 ///Gets the the ID of the given element from the mapping. x , y : Position. 151 public wchar readMapping(int x, int y){ 152 /*if(x<0 || x>totalX/tileX){ 153 return 0xFFFF; 154 }*/ 155 return mapping[x+(mX*y)]; 156 } 157 ///Writes to the map. x , y : Position. w : ID of the tile. 158 public void writeMapping(int x, int y, wchar w){ 159 mapping[x+(mX*y)]=w; 160 } 161 //Loads a mapping from an array. x , y : Sizes of the mapping. map : an array representing the elements of the map. 162 //x*y=map.length 163 public void loadMapping(int x, int y, wchar[] map){ 164 mX=x; 165 mY=y; 166 mapping = map; 167 totalX=mX*tileX; 168 totalY=mY*tileY; 169 } 170 //Adds a tile to the tileSet. t : The tile. id : The ID in wchar to differentiate between different tiles. 171 public void addTile(Bitmap16Bit t, wchar id){ 172 if(t.getX()==tileX && t.getY()==tileY){ 173 tileSet[id]=t; 174 } 175 else{ 176 throw new TileFormatException("Incorrect tile size!", __FILE__, __LINE__, null); 177 } 178 } 179 //Removes the tile with the ID from the set. 180 public void removeTile(wchar id){ 181 tileSet.remove(id); 182 } 183 184 public wchar tileByPixel(int x, int y){ 185 if(x/tileX + (y/tileY)*mX < 0 || x/tileX + (y/tileY)*mX >= mapping.length) return 0xFFFF; 186 return mapping[x/tileX + (y/tileY)*mX]; 187 } 188 189 public override void updateRaster(void* workpad, int pitch, ubyte[] palette, int[] threads){ 190 191 if((sX + rasterX <= 0 || sX > totalX) && !wrapMode) return; 192 switch(renderMode){ 193 case TileLayerRenderingMode.ALPHA_BLENDING: 194 int y = sY < 0 ? sY * -1 : 0; 195 //int yBegin = sY < 0 ? sY * -1 : 0; 196 /*if(wrapMode){ 197 y = sX + 0x7FFFFFFF; 198 }else{ 199 y = sX < 0 ? 0 : sX; 200 }*/ 201 for( ; y < rasterY ; y++){ 202 //writeln(y); 203 //if((sY + y >= totalY) && !wrapMode) break; 204 //if(y + sY >= 0){ 205 int offsetP = y*pitch; // The offset of the line that is being written 206 int offsetY = tileY * ((y + sY)%tileY); 207 int offsetX = sX%tileX; 208 //int outscrollX = sX<0 ? sX*-1 : 0; 209 //int tnXreg = (sX-(sX%tileX))/tileX; 210 //int tnXC = tnXreg + (rasterX/tileX); 211 //bool finish; 212 //writeln(offsetY); 213 //while(!finish){ 214 int x = sX < 0 ? sX * -1 : 0; 215 int targetX = totalX - sX > rasterX ? rasterX : rasterX - (totalX - sX); 216 void *p0 = (workpad + (x*4) + offsetP); 217 while(x < targetX){ 218 //writeln(tnXreg+(mX*((y+sY-((y+sY)%tileY))/tileY))); 219 //ushort[] chunk = tileSet[mapping[tnXreg+(mX*((y+sY-((y+sY)%tileY))/tileY))]].readRow((y+sY)%tileY); 220 221 //ushort *c = tileSet[mapping[tnXreg+(mX*((y+sY-((y+sY)%tileY))/tileY))]].getPtr(); 222 223 wchar currentTile = tileByPixel(x+sX,y+sY); 224 if(currentTile != 0xFFFF){ // skip if tile is null 225 //writeln(currentTile); 226 int tileXtarget = x + tileX < rasterX ? tileX : tileX - ((x + tileX) - rasterX); // 227 //if(tileXtarget + x > ){} 228 int xp = (offsetX != 0 && x == 0) ? offsetX : 0; // 229 ushort *c = tileSet[currentTile].getPtr(); // pointer to the current tile's pixeldata 230 c += offsetY; 231 c += xp; 232 //int foo = (tnXreg*tileX); 233 for(; xp < tileXtarget-3; xp+=4){ 234 235 ubyte[16] *p = cast(ubyte[16]*)p0; 236 //writeln(p,',',x,',',y,',',xp); 237 ubyte[16] src; 238 //writeln(*c); 239 *cast(ubyte[4]*)(src.ptr) = *cast(ubyte[4]*)(palette.ptr + (4 * *c)); 240 c++; 241 *cast(ubyte[4]*)(src.ptr + 4) = *cast(ubyte[4]*)(palette.ptr + (4 * *c)); 242 c++; 243 *cast(ubyte[4]*)(src.ptr + 8) = *cast(ubyte[4]*)(palette.ptr + (4 * *c)); 244 c++; 245 *cast(ubyte[4]*)(src.ptr + 12) = *cast(ubyte[4]*)(palette.ptr + (4 * *c)); 246 c++; 247 ubyte[16] alpha = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 248 //uint[4] alpha; 249 //writeln(src); 250 asm{ 251 //calculating alpha 252 //pxor XMM1, XMM1; 253 movups XMM0, alpha; 254 255 movups XMM1, XMM0; 256 punpcklbw XMM0, XMM2; 257 punpckhbw XMM1, XMM2; 258 movaps XMM6, alphaSSEConst256; 259 movaps XMM7, XMM6; 260 movaps XMM4, alphaSSEConst1; 261 movaps XMM5, XMM4; 262 263 264 //punpcklbw XMM1, XMM2; 265 266 paddusw XMM4, XMM0; //1 + alpha01 267 paddusw XMM5, XMM1; 268 psubusw XMM6, XMM0; //256 - alpha01 269 psubusw XMM7, XMM1; 270 271 //moving the values to their destinations 272 mov EBX, p[EBP]; 273 movups XMM0, src; //src01 274 movups XMM1, XMM0; //src23 275 punpcklbw XMM0, XMM2; 276 punpckhbw XMM1, XMM2; 277 pmullw XMM4, XMM0; //src01 * (1 + alpha01) 278 pmullw XMM5, XMM1; //src23 * (1 + alpha23) 279 movups XMM0, [EBX]; //dest01 280 movups XMM1, XMM0; //dest23 281 punpcklbw XMM0, XMM2; 282 punpckhbw XMM1, XMM2; 283 pmullw XMM6, XMM0; //dest01 * (256 - alpha) 284 pmullw XMM7, XMM1; //dest23 * (256 - alpha) 285 286 paddusw XMM4, XMM6; //(src01 * (1 + alpha01)) + (dest01 * (256 - alpha01)) 287 paddusw XMM5, XMM7; //(src * (1 + alpha)) + (dest * (256 - alpha)) 288 psrlw XMM4, 8; //(src * (1 + alpha)) + (dest * (256 - alpha)) / 256 289 psrlw XMM5, 8; 290 //moving the result to its place; 291 //pxor MM2, MM2; 292 packuswb XMM4, XMM5; 293 294 movups [EBX], XMM4; 295 296 //emms; 297 } 298 //writeln(*p); 299 x+=4; 300 p0+=16; 301 } 302 for(; xp < tileXtarget; xp++){ 303 ubyte[4] *p = cast(ubyte[4]*)p0; 304 ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 305 c++; 306 ushort[4] alpha = [src[0],src[0],src[0],src[0]]; 307 asm{ 308 pxor XMM3, XMM3; 309 movq XMM2, alpha; 310 mov EBX, p[EBP]; 311 movd XMM0, [EBX]; 312 movd XMM1, src; 313 punpcklbw XMM0, XMM3;//dest 314 punpcklbw XMM1, XMM3;//src 315 //punpcklbw XMM2, XMM3;//alpha 316 movaps XMM4, alphaSSEConst256; 317 movaps XMM5, alphaSSEConst1; 318 319 paddusw XMM5, XMM2;//1+alpha 320 psubusw XMM4, XMM2;//256-alpha 321 322 pmullw XMM0, XMM4;//dest*(256-alpha) 323 pmullw XMM1, XMM5;//src*(1+alpha) 324 paddusw XMM0, XMM1;//(src*(1+alpha))+(dest*(256-alpha)) 325 psrlw XMM0, 8;//(src*(1+alpha))+(dest*(256-alpha))/256 326 //pxor XMM7, XMM7; 327 packuswb XMM0, XMM3; 328 329 movd [EBX], XMM0; 330 331 //pxor XMM0, XMM0; 332 //pxor XMM1, XMM1; 333 pxor XMM2, XMM2; 334 } 335 x++; 336 p0+=4; 337 } 338 /*ushort c = chunk[x]; 339 alphaBlend(palette[(c*4)+1],palette[(c*4)+2],palette[(c*4)+3],palette[(c*4)], workpad + ((tnXreg*tileX)+x-sX)*4 + y*pitch);*/ 340 341 342 }else{ 343 x+=tileX; 344 } 345 } 346 347 }break; 348 case TileLayerRenderingMode.BLITTER: 349 int y = sY < 0 ? sY * -1 : 0; 350 351 for( ; y < rasterY ; y++){ 352 353 int offsetP = y*pitch; // The offset of the line that is being written 354 int offsetY = tileY * ((y + sY)%tileY); 355 int offsetX = sX%tileX; 356 357 int x = sX < 0 ? sX * -1 : 0; 358 int targetX = totalX - sX > rasterX ? rasterX : rasterX - (totalX - sX); 359 void *p0 = (workpad + (x*4) + offsetP); 360 while(x < targetX){ 361 362 wchar currentTile = tileByPixel(x+sX,y+sY); 363 if(currentTile != 0xFFFF){ 364 int tileXtarget = x + tileX < rasterX ? tileX : tileX - ((x + tileX) - rasterX); // 365 366 //int xp; // 367 int xp = (offsetX != 0 && x == 0) ? offsetX : 0; // 368 ushort *c = tileSet[currentTile].getPtr(); // pointer to the current tile's pixeldata 369 c += offsetY; 370 c += xp; 371 //int foo = (tnXreg*tileX); 372 for(; xp < tileXtarget-3; xp+=4){ 373 374 ubyte[16] *p = cast(ubyte[16]*)p0; 375 ubyte[16] src; 376 *cast(ubyte[4]*)(src.ptr) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 377 c++; 378 *cast(ubyte[4]*)(src.ptr + 4) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 379 c++; 380 *cast(ubyte[4]*)(src.ptr + 8) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 381 c++; 382 *cast(ubyte[4]*)(src.ptr + 12) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 383 c++; 384 ubyte[16] alpha = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 385 386 asm{ 387 //generating copying mask 388 pxor XMM1, XMM1; 389 movups XMM0, alpha; 390 pcmpgtd XMM0, XMM1; 391 392 mov EBX, p[EBP]; 393 movups XMM2, src; 394 movups XMM3, [EBX]; 395 //the blitter algorithm 396 pand XMM3, XMM0; 397 por XMM3, XMM2; 398 //writeback 399 movups [EBX], XMM3; 400 401 } 402 x+=4; 403 p0+=16; 404 } 405 for(; xp < tileXtarget; xp++){ 406 ubyte[4] *p = cast(ubyte[4]*)p0; 407 ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 408 c++; 409 ubyte[4] alpha = [src[0],src[0],src[0],src[0]]; 410 asm{ 411 //generating copying mask 412 pxor XMM1, XMM1; 413 movd XMM0, alpha; 414 pcmpgtd XMM0, XMM1; 415 416 mov EBX, p[EBP]; 417 movd XMM2, src; 418 movd XMM3, [EBX]; 419 //the blitter algorithm 420 pand XMM3, XMM0; 421 por XMM3, XMM2; 422 //writeback 423 movd [EBX], XMM3; 424 425 } 426 x++; 427 p0+=4; 428 } 429 430 }else{ 431 x+=tileX; 432 } 433 } 434 435 } 436 break; 437 default: 438 int y = sY < 0 ? sY * -1 : 0; 439 440 for( ; y < rasterY ; y++){ 441 442 int offsetP = y*pitch; // The offset of the line that is being written 443 int offsetY = tileY * (y - sY)%tileY; 444 445 int x = sX < 0 ? sX * -1 : 0; 446 int targetX = totalX - sX > rasterX ? rasterX : rasterX - (totalX - sX); 447 void *p0 = (workpad + (x*4) + offsetP); 448 while(x < targetX){ 449 450 wchar currentTile = tileByPixel(x+sX,y+sY); 451 if(currentTile != 0x0000){ 452 int tileXtarget = x + tileX < rasterX ? tileX : tileX - ((x + tileX) - rasterX); // 453 454 int xp; // 455 ushort *c = tileSet[currentTile].getPtr(); // pointer to the current tile's pixeldata 456 c += offsetY; 457 //int foo = (tnXreg*tileX); 458 for(; xp < tileXtarget-3; xp+=4){ 459 460 ubyte[16] *p = cast(ubyte[16]*)p0; 461 ubyte[16] src; 462 *cast(ubyte[4]*)(src.ptr) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 463 c++; 464 *cast(ubyte[4]*)(src.ptr + 4) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 465 c++; 466 *cast(ubyte[4]*)(src.ptr + 8) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 467 c++; 468 *cast(ubyte[4]*)(src.ptr + 12) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 469 c++; 470 //ubyte[16] alpha = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 471 472 asm{ 473 474 mov EBX, p[EBP]; 475 movups XMM2, src; 476 //writeback 477 movups [EBX], XMM2; 478 479 } 480 x+=4; 481 p0+=16; 482 } 483 for(; xp < tileXtarget; xp++){ 484 ubyte[4] *p = cast(ubyte[4]*)p0; 485 ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 486 487 c++; 488 //ubyte[4] alpha = [src[0],src[0],src[0],src[0]]; 489 asm{ 490 491 mov EBX, p[EBP]; 492 movd XMM2, src; 493 //writeback 494 movd [EBX], XMM2; 495 496 } 497 x++; 498 p0+=4; 499 } 500 501 }else{ 502 x+=tileX; 503 } 504 } 505 506 } 507 break; 508 } 509 } 510 511 public void updateRaster(Bitmap16Bit frameBuffer){ 512 if(sX + rasterX <= 0 || sX > totalX) return; 513 for(int y ; y < rasterY ; y++){ 514 if(sY + y >= totalY) break; 515 if(y + sY >= 0){ 516 517 //int outscrollX = sX<0 ? sX*-1 : 0; 518 int tnXreg = sX>0 ? (sX-(sX%tileX))/tileX : 0; 519 //int tnXC = tnXreg + (rasterX/tileX); 520 bool finish; 521 while(!finish){ 522 //writeln(tnXreg+(mX*((y+sY-((y+sY)%tileY))/tileY))); 523 ushort[] chunk = tileSet[mapping[tnXreg+(mX*((y+sY-((y+sY)%tileY))/tileY))]].readRow((y+sY)%tileY); 524 for(int x; x <tileX; x++){ 525 526 if((tnXreg*tileX)+x-sX >= 0 && (tnXreg*tileX)+x-sX < rasterX){ 527 frameBuffer.writePixel((tnXreg*tileX)+x-sX,y,chunk[x]); 528 }else if((tnXreg*tileX)+x-sX >= rasterX){ 529 finish = true; 530 } 531 } 532 tnXreg++; 533 if(tnXreg == mX){ finish = true;} 534 } 535 } 536 } 537 538 } 539 540 public BLInfo getLayerInfo(){ 541 return BLInfo(tileX,tileY,mX,mY); 542 } 543 public Bitmap16Bit getTile(wchar id){ 544 return tileSet[id]; 545 } 546 public wchar[] getMapping(){ 547 return mapping; 548 } 549 } 550 /* 551 *Used by the collision detectors 552 */ 553 public interface ISpriteCollision{ 554 //public Bitmap16Bit[int] getSpriteSet(); 555 public Coordinate[int] getCoordinates(); 556 public FlipRegister[int] getFlipRegisters(); 557 public int[int] getSpriteSorter(); 558 //public ushort getTransparencyIndex(); 559 } 560 561 public interface ISpriteLayer{ 562 //public void addSprite(Bitmap16Bit s, int n, Coordinate c); 563 //public void addSprite(Bitmap16Bit s, int n, int x, int y); 564 public void removeSprite(int n); 565 public void moveSprite(int n, int x, int y); 566 public void relMoveSprite(int n, int x, int y); 567 } 568 public interface ISpriteLayer16Bit : ISpriteLayer{ 569 public void addSprite(Bitmap16Bit s, int n, Coordinate c); 570 public void addSprite(Bitmap16Bit s, int n, int x, int y); 571 public void replaceSprite(Bitmap16Bit s, int n); 572 public void replaceSprite(Bitmap16Bit s, int n, int x, int y); 573 public void replaceSprite(Bitmap16Bit s, int n, Coordinate c); 574 } 575 public interface ISpriteLayer32Bit : ISpriteLayer{ 576 public void addSprite(Bitmap32Bit s, int n, Coordinate c); 577 public void addSprite(Bitmap32Bit s, int n, int x, int y); 578 public void replaceSprite(Bitmap32Bit s, int n); 579 public void replaceSprite(Bitmap32Bit s, int n, int x, int y); 580 public void replaceSprite(Bitmap32Bit s, int n, Coordinate c); 581 } 582 /* 583 *Use it to call the collision detector 584 */ 585 public interface SpriteMovementListener{ 586 void spriteMoved(int ID); 587 } 588 /** 589 *Sprite controller and renderer. 590 */ 591 public class SpriteLayer : Layer, ISpriteCollision, ISpriteLayer16Bit{ 592 private Bitmap16Bit[int] spriteSet; 593 private Coordinate[int] coordinates; //Use moveSprite() and relMoveSprite() instead to move sprites 594 private FlipRegister[int] flipRegisters; 595 private int[] spriteSorter; 596 public SpriteMovementListener[int] collisionDetector; 597 //Constructors. 598 /*public this(int n){ 599 spriteSet.length = n; 600 coordinates.length = n; 601 flipRegisters.length = n; 602 }*/ 603 604 public this(){ 605 606 } 607 608 public void addSprite(Bitmap16Bit s, int n, Coordinate c){ 609 spriteSet[n] = s; 610 coordinates[n] = c; 611 flipRegisters[n] = FlipRegister.NORM; 612 spriteSorter ~= n; 613 //sortSprites(); 614 spriteSorter.sort(); 615 616 } 617 618 public void addSprite(Bitmap16Bit s, int n, int x, int y){ 619 spriteSet[n] = s; 620 coordinates[n] = Coordinate(x,y,x+s.getX(),y+s.getY()); 621 flipRegisters[n] = FlipRegister.NORM; 622 //spriteSorter[n] = n; 623 spriteSorter ~= n; 624 //sortSprites(); 625 626 spriteSorter.sort(); 627 628 } 629 /** 630 * 631 */ 632 public void replaceSprite(Bitmap16Bit s, int n){ 633 634 if(!(s.getX == spriteSet[n].getX && s.getY == spriteSet[n].getY)){ 635 coordinates[n] = Coordinate(coordinates[n].left,coordinates[n].top,coordinates[n].left + s.getX,coordinates[n].top + s.getY); 636 } 637 spriteSet[n] = s; 638 } 639 640 public void replaceSprite(Bitmap16Bit s, int n, int x, int y){ 641 spriteSet[n] = s; 642 coordinates[n] = Coordinate(x,y,x+s.getX(),y+s.getY()); 643 } 644 645 public void replaceSprite(Bitmap16Bit s, int n, Coordinate c){ 646 spriteSet[n] = s; 647 coordinates[n] = c; 648 } 649 650 /*public ushort getTransparencyIndex(){ 651 return transparencyIndex; 652 }*/ 653 654 public void removeSprite(int n){ 655 //spriteSorter.remove(n); 656 coordinates.remove(n); 657 flipRegisters.remove(n); 658 spriteSet.remove(n); 659 int[] newSpriteSorter; 660 for(int i; i < spriteSorter.length; i++){ 661 //writeln(0); 662 if(spriteSorter[i] != n){ 663 newSpriteSorter ~= spriteSorter[i]; 664 665 } 666 } 667 spriteSorter = newSpriteSorter; 668 //writeln(spriteSorter); 669 //sortSprites(); 670 } 671 public void moveSprite(int n, int x, int y){ 672 coordinates[n].move(x,y); 673 callCollisionDetector(n); 674 } 675 public void relMoveSprite(int n, int x, int y){ 676 coordinates[n].relMove(x,y); 677 callCollisionDetector(n); 678 } 679 680 public Bitmap16Bit[int] getSpriteSet(){ 681 return spriteSet; 682 } 683 684 public Coordinate[int] getCoordinates(){ 685 return coordinates; 686 } 687 688 public FlipRegister[int] getFlipRegisters(){ 689 return flipRegisters; 690 } 691 public int[int] getSpriteSorter(){ 692 return null; 693 } 694 695 private void callCollisionDetector(int n){ 696 foreach(c; collisionDetector){ 697 c.spriteMoved(n); 698 } 699 } 700 701 public override void updateRaster(void* workpad, int pitch, ubyte[] palette, int[] threads){ 702 foreach_reverse(int i ; spriteSorter){ 703 /*foreach(int i ; spriteSet.byKey){*/ 704 if((coordinates[i].right > sX && coordinates[i].bottom > sY) && (coordinates[i].left < sX + rasterX && coordinates[i].top < sY + rasterY)) { 705 //writeln(i); 706 int offsetXA, offsetXB, offsetYA, offsetYB, sizeX = coordinates[i].getXSize(), offsetX = coordinates[i].left - sX; 707 if(sX > coordinates[i].left) {offsetXA = sX - coordinates[i].left; } 708 if(sY > coordinates[i].top) {offsetYA = sY - coordinates[i].top; } 709 if(sX + rasterX < coordinates[i].right) {offsetXB = coordinates[i].right - rasterX; } 710 if(sY + rasterY < coordinates[i].bottom) {offsetYB = coordinates[i].bottom - rasterY; } 711 ushort* p0 = spriteSet[i].getPtr(); 712 for(int y = offsetYA ; y < coordinates[i].getYSize() - offsetYB ; y++){ 713 //ushort[] chunk = (flipRegisters[i] == FlipRegister.Y || flipRegisters[i] == FlipRegister.XY) ? spriteSet[i].readRowReverse(y) : spriteSet[i].readRow(y); 714 int offsetP = sizeX * y, offsetY = (coordinates[i].top - sY + y)*pitch; 715 int x = offsetXA; 716 //if(x < 0) writeln(x); 717 if(flipRegisters[i] == FlipRegister.X || flipRegisters[i] == FlipRegister.XY){ 718 for(; x < sizeX - offsetXB ; x+=4){ 719 ushort* c = (p0 + (sizeX - x - 1) + offsetP); 720 721 //ushort c = chunk[chunk.length-x-1]; 722 //alphaBlend(palette[(c*4)+1],palette[(c*4)+2],palette[(c*4)+3],palette[(c*4)], workpad + (coordinates[i].xa - sX + x)*4 + (coordinates[i].ya - sY + y)*pitch); 723 //alphaBlend(*cast(ubyte[4]*)(palette.ptr + 4 * c), workpad + (coordinates[i].xa - sX + x)*4 + (coordinates[i].ya - sY + y)*pitch); 724 //ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 725 //ubyte[4] *p = cast(ubyte[4]*)(workpad + (offsetX + x)*4 + offsetY); 726 /*if(src[0] == 255){ 727 *p = src; 728 } 729 else if(src[0] != 0){ 730 ubyte[4] dest2 = *p; 731 dest2[1] = to!ubyte((src[1] * src[0] + dest2[1] * (255 - src[0]))>>8); 732 dest2[2] = to!ubyte((src[2] * src[0] + dest2[2] * (255 - src[0]))>>8); 733 dest2[3] = to!ubyte((src[3] * src[0] + dest2[3] * (255 - src[0]))>>8); 734 *p = dest2; 735 }*/ 736 ubyte[16] *p = cast(ubyte[16]*)(workpad + (offsetX + x)*4 + offsetY); 737 ubyte[16] src; 738 //uint[4] src; 739 *cast(ubyte[4]*)(src.ptr) = *cast(ubyte[4]*)(palette.ptr + 4 * *(c+3)); 740 *cast(ubyte[4]*)(src.ptr + 4) = *cast(ubyte[4]*)(palette.ptr + 4 * *(c+2)); 741 *cast(ubyte[4]*)(src.ptr + 8) = *cast(ubyte[4]*)(palette.ptr + 4 * *(c+1)); 742 *cast(ubyte[4]*)(src.ptr + 12) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 743 ubyte[16] alpha = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 744 745 746 asm{ 747 //calculating alpha 748 //pxor XMM1, XMM1; 749 movups XMM0, alpha; 750 751 movups XMM1, XMM0; 752 punpcklbw XMM0, XMM2; 753 punpckhbw XMM1, XMM3; 754 movaps XMM6, alphaSSEConst256; 755 movaps XMM7, XMM6; 756 movaps XMM4, alphaSSEConst1; 757 movaps XMM5, XMM4; 758 759 760 //punpcklbw XMM1, XMM2; 761 762 paddusw XMM4, XMM1; //1 + alpha01 763 paddusw XMM5, XMM0; 764 psubusw XMM6, XMM1; //256 - alpha01 765 psubusw XMM7, XMM0; 766 767 //moving the values to their destinations 768 mov EBX, p[EBP]; 769 movups XMM0, src; //src01 770 movups XMM1, XMM0; //src23 771 punpcklbw XMM0, XMM2; 772 punpckhbw XMM1, XMM3; 773 pmullw XMM4, XMM0; //src01 * (1 + alpha01) 774 pmullw XMM5, XMM1; //src23 * (1 + alpha23) 775 movups XMM0, [EBX]; //dest01 776 movups XMM1, XMM0; //dest23 777 punpcklbw XMM0, XMM2; 778 punpckhbw XMM1, XMM3; 779 pmullw XMM6, XMM0; //dest01 * (256 - alpha) 780 pmullw XMM7, XMM1; //dest23 * (256 - alpha) 781 782 paddusw XMM4, XMM6; //(src01 * (1 + alpha01)) + (dest01 * (256 - alpha01)) 783 paddusw XMM5, XMM7; //(src * (1 + alpha)) + (dest * (256 - alpha)) 784 psrlw XMM4, 8; //(src * (1 + alpha)) + (dest * (256 - alpha)) / 256 785 psrlw XMM5, 8; 786 //moving the result to its place; 787 //pxor MM2, MM2; 788 packuswb XMM4, XMM5; 789 790 movups [EBX], XMM4; 791 792 //emms; 793 } 794 } 795 for(; x < sizeX - offsetXB ; x++){ 796 ushort* c = (p0 + (sizeX - x - 1) + offsetP); 797 798 ubyte[4] *p = cast(ubyte[4]*)(workpad + (offsetX + x)*4 + offsetY); 799 ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 800 ushort[4] alpha = [src[0],src[0],src[0],src[0]]; 801 asm{ 802 pxor XMM3, XMM3; 803 movq XMM2, alpha; 804 mov EBX, p[EBP]; 805 movd XMM0, [EBX]; 806 movd XMM1, src; 807 punpcklbw XMM0, XMM3;//dest 808 punpcklbw XMM1, XMM3;//src 809 //punpcklbw XMM2, XMM3;//alpha 810 movaps XMM4, alphaSSEConst256; 811 movaps XMM5, alphaSSEConst1; 812 813 paddusw XMM5, XMM2;//1+alpha 814 psubusw XMM4, XMM2;//256-alpha 815 816 pmullw XMM0, XMM4;//dest*(256-alpha) 817 pmullw XMM1, XMM5;//src*(1+alpha) 818 paddusw XMM0, XMM1;//(src*(1+alpha))+(dest*(256-alpha)) 819 psrlw XMM0, 8;//(src*(1+alpha))+(dest*(256-alpha))/256 820 //pxor XMM7, XMM7; 821 packuswb XMM0, XMM3; 822 823 movd [EBX], XMM0; 824 825 //pxor XMM0, XMM0; 826 //pxor XMM1, XMM1; 827 pxor XMM2, XMM2; 828 } 829 830 } 831 } 832 else{ //for non flipped sprites 833 void* pl = (workpad + (offsetX + x)*4 + offsetY); 834 ushort* c = p0 + x + offsetP; 835 for(; x < sizeX - offsetXB - 3 ; x+=4){ 836 //ushort* c = p0 + x + offsetP; 837 ubyte[16] *p = cast(ubyte[16]*)pl; //(workpad + (offsetX + x)*4 + offsetY); 838 ubyte[16] src; 839 *cast(ubyte[4]*)(src.ptr) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 840 c++; 841 *cast(ubyte[4]*)(src.ptr + 4) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 842 c++; 843 *cast(ubyte[4]*)(src.ptr + 8) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 844 c++; 845 *cast(ubyte[4]*)(src.ptr + 12) = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 846 c++; 847 ubyte[16] alpha = [src[12],src[12],src[12],src[12],src[8],src[8],src[8],src[8],src[4],src[4],src[4],src[4],src[0],src[0],src[0],src[0]]; 848 849 //uint[4] src; 850 //uint[4] alpha; 851 852 asm{ 853 //do a test if alpha-blending and/or blitter can avoided 854 /* 855 movups XMM0, alpha; 856 pxor XMM1, XMM1; 857 pcmpeqq XMM1, XMM0; //use packed testing of SSE to figure out if any operation can be skipped 858 je endofalgorithm; 859 movaps XMM3, SSEUQWmaxvalue; //use further tests if blitter can be used 860 pcmpeqq XMM3, XMM0; 861 pand XMM3, XMM1; 862 pcmpeqq XMM3, SSEUQWmaxvalue; 863 jne alphablend; 864 865 //blitter routine 866 mov EBX, p[EBP]; 867 movups XMM0, src; 868 movups XMM1, [EBX]; 869 pxor XMM3, XMM3; 870 pcmpeqq XMM3, XMM0; 871 pand XMM1, XMM3; 872 por XMM1, XMM0; 873 movups [EBX], XMM1; 874 jmp endofalgorithm; 875 876 alphablend:*/ 877 //calculating alpha 878 //pxor XMM1, XMM1; 879 880 movups XMM0, alpha; 881 movups XMM1, XMM0; 882 punpcklbw XMM0, XMM2; 883 punpckhbw XMM1, XMM2; 884 movaps XMM6, alphaSSEConst256; 885 movaps XMM7, XMM6; 886 movaps XMM4, alphaSSEConst1; 887 movaps XMM5, XMM4; 888 889 890 //punpcklbw XMM1, XMM2; 891 892 paddusw XMM4, XMM0; //1 + alpha01 893 paddusw XMM5, XMM1; 894 psubusw XMM6, XMM0; //256 - alpha01 895 psubusw XMM7, XMM1; 896 897 //moving the values to their destinations 898 mov EBX, p[EBP]; 899 movups XMM0, src; //src01 900 movups XMM1, XMM0; //src23 901 punpcklbw XMM0, XMM2; 902 punpckhbw XMM1, XMM2; 903 pmullw XMM4, XMM0; //src01 * (1 + alpha01) 904 pmullw XMM5, XMM1; //src23 * (1 + alpha23) 905 movups XMM0, [EBX]; //dest01 906 movups XMM1, XMM0; //dest23 907 punpcklbw XMM0, XMM2; 908 punpckhbw XMM1, XMM2; 909 pmullw XMM6, XMM0; //dest01 * (256 - alpha) 910 pmullw XMM7, XMM1; //dest23 * (256 - alpha) 911 912 paddusw XMM4, XMM6; //(src01 * (1 + alpha01)) + (dest01 * (256 - alpha01)) 913 paddusw XMM5, XMM7; //(src * (1 + alpha)) + (dest * (256 - alpha)) 914 psrlw XMM4, 8; //(src * (1 + alpha)) + (dest * (256 - alpha)) / 256 915 psrlw XMM5, 8; 916 //moving the result to its place; 917 //pxor MM2, MM2; 918 packuswb XMM4, XMM5; 919 920 movups [EBX], XMM4; 921 922 //endofalgorithm: 923 924 } 925 pl += 16; 926 //c += 4; 927 //*p = [res[0],res[2],res[4],res[6]]; 928 //ubyte[4] res = *p; 929 //writeln(res); 930 931 //} 932 } 933 for(; x < sizeX - offsetXB ; x++){ 934 //ushort* c = p0 + x + offsetP; 935 936 ubyte[4] *p = cast(ubyte[4]*)pl; //(workpad + (offsetX + x)*4 + offsetY); 937 ubyte[4] src = *cast(ubyte[4]*)(palette.ptr + 4 * *c); 938 ushort[4] alpha = [src[0],src[0],src[0],src[0]]; 939 asm{ 940 //pxor XMM3, XMM3; 941 movq XMM2, alpha; 942 mov EBX, p[EBP]; 943 movd XMM0, [EBX]; 944 movd XMM1, src; 945 punpcklbw XMM0, XMM3;//dest 946 punpcklbw XMM1, XMM3;//src 947 //punpcklbw XMM2, XMM3;//alpha 948 movaps XMM4, alphaSSEConst256; 949 movaps XMM5, alphaSSEConst1; 950 951 paddusw XMM5, XMM2;//1+alpha 952 psubusw XMM4, XMM2;//256-alpha 953 954 pmullw XMM0, XMM4;//dest*(256-alpha) 955 pmullw XMM1, XMM5;//src*(1+alpha) 956 paddusw XMM0, XMM1;//(src*(1+alpha))+(dest*(256-alpha)) 957 psrlw XMM0, 8;//(src*(1+alpha))+(dest*(256-alpha))/256 958 //pxor XMM7, XMM7; 959 packuswb XMM0, XMM3; 960 961 movd [EBX], XMM0; 962 963 //pxor XMM0, XMM0; 964 //pxor XMM1, XMM1; 965 pxor XMM2, XMM2; 966 } 967 pl += 4; 968 c++; 969 } 970 } 971 } 972 } 973 } 974 } 975 976 /*public void updateRaster(Bitmap16Bit frameBuffer){ 977 //writeln(spriteSorter); 978 foreach_reverse(int i ; spriteSorter){ 979 /*foreach(int i ; spriteSet.byKey){*/ 980 /*if((coordinates[i].right > sX && coordinates[i].bottom > sY) && (coordinates[i].left < sX + rasterX && coordinates[i].right < sY + rasterY)) { 981 //writeln(i); 982 int offsetXA, offsetXB, offsetYA, offsetYB; 983 //if(sX > coordinates[i].xa) {offsetXA = sX - coordinates[i].xa; } 984 if(sY > coordinates[i].top) {offsetYA = sY - coordinates[i].top; } 985 //if(sX + rasterX < coordinates[i].xb) {offsetXB = sX - coordinates[i].xb - rasterX; } 986 if(sY + rasterY < coordinates[i].bottom) {offsetYB = coordinates[i].bottom - rasterY; } 987 for(int y = offsetYA ; y < coordinates[i].getYSize() - offsetYB ; y++){ 988 ushort[] chunk = (flipRegisters[i] == FlipRegister.Y || flipRegisters[i] == FlipRegister.XY) ? spriteSet[i].readRowReverse(y) : spriteSet[i].readRow(y); 989 if(flipRegisters[i] == FlipRegister.X || flipRegisters[i] == FlipRegister.XY){ 990 for(int x ; x < chunk.length ; x++){ 991 if(coordinates[i].left - sX + x >= 0 && coordinates[i].left - sX + x < rasterX){ 992 if(chunk[chunk.length-x-1] != transparencyIndex) frameBuffer.writePixel(coordinates[i].left - sX + x, coordinates[i].top - sY + y, chunk[chunk.length-x-1]); 993 } 994 } 995 } 996 else{ 997 for(int x ; x < chunk.length ; x++){ 998 if(coordinates[i].left - sX + x >= 0 && coordinates[i].left - sX + x < rasterX){ 999 if(chunk[x] != transparencyIndex) frameBuffer.writePixel(coordinates[i].left - sX + x, coordinates[i].top - sY + y, chunk[x]); 1000 } 1001 } 1002 } 1003 } 1004 } 1005 } 1006 }*/ 1007 1008 1009 } 1010 1011 public class SpriteLayer32Bit : Layer, ISpriteCollision, ISpriteLayer32Bit{ 1012 private Bitmap32Bit[int] spriteSet; 1013 private Coordinate[int] coordinates; //Use moveSprite() and relMoveSprite() instead to move sprites 1014 private FlipRegister[int] flipRegisters; 1015 private int[] spriteSorter; 1016 public SpriteMovementListener[int] collisionDetector; 1017 1018 1019 public this(){ 1020 1021 } 1022 1023 public void addSprite(Bitmap32Bit s, int n, Coordinate c){ 1024 spriteSet[n] = s; 1025 coordinates[n] = c; 1026 flipRegisters[n] = FlipRegister.NORM; 1027 spriteSorter ~= n; 1028 //sortSprites(); 1029 spriteSorter.sort(); 1030 1031 } 1032 1033 public void addSprite(Bitmap32Bit s, int n, int x, int y){ 1034 writeln(s); 1035 spriteSet[n] = s; 1036 coordinates[n] = Coordinate(x,y,x+spriteSet[n].getX,y+spriteSet[n].getY); 1037 flipRegisters[n] = FlipRegister.NORM; 1038 //spriteSorter[n] = n; 1039 spriteSorter ~= n; 1040 //sortSprites(); 1041 1042 spriteSorter.sort(); 1043 1044 } 1045 1046 public void replaceSprite(Bitmap32Bit s, int n){} 1047 public void replaceSprite(Bitmap32Bit s, int n, int x, int y){} 1048 public void replaceSprite(Bitmap32Bit s, int n, Coordinate c){} 1049 1050 /*public ushort getTransparencyIndex(){ 1051 return transparencyIndex; 1052 }*/ 1053 1054 public void removeSprite(int n){ 1055 //spriteSorter.remove(n); 1056 coordinates.remove(n); 1057 flipRegisters.remove(n); 1058 spriteSet.remove(n); 1059 int[] newSpriteSorter; 1060 for(int i; i < spriteSorter.length; i++){ 1061 //writeln(0); 1062 if(spriteSorter[i] != n){ 1063 newSpriteSorter ~= spriteSorter[i]; 1064 1065 } 1066 } 1067 spriteSorter = newSpriteSorter; 1068 //writeln(spriteSorter); 1069 //sortSprites(); 1070 } 1071 public void moveSprite(int n, int x, int y){ 1072 coordinates[n].move(x,y); 1073 callCollisionDetector(n); 1074 } 1075 public void relMoveSprite(int n, int x, int y){ 1076 coordinates[n].relMove(x,y); 1077 callCollisionDetector(n); 1078 } 1079 1080 public Bitmap32Bit[int] getSpriteSet(){ 1081 return spriteSet; 1082 } 1083 1084 public Coordinate[int] getCoordinates(){ 1085 return coordinates; 1086 } 1087 1088 public FlipRegister[int] getFlipRegisters(){ 1089 return flipRegisters; 1090 } 1091 public int[int] getSpriteSorter(){ 1092 return null; 1093 } 1094 1095 private void callCollisionDetector(int n){ 1096 foreach(c; collisionDetector){ 1097 c.spriteMoved(n); 1098 } 1099 } 1100 1101 public override void updateRaster(void* workpad, int pitch, ubyte[] palette, int[] threads){ 1102 foreach_reverse(int i ; spriteSorter){ 1103 1104 if((coordinates[i].right > sX && coordinates[i].bottom > sY) && (coordinates[i].left < sX + rasterX && coordinates[i].top < sY + rasterY)) { 1105 //writeln(i); 1106 int offsetXA, offsetXB, offsetYA, offsetYB, sizeX = coordinates[i].getXSize(), offsetX = coordinates[i].left - sX; 1107 if(sX > coordinates[i].left) {offsetXA = sX - coordinates[i].left; } 1108 if(sY > coordinates[i].top) {offsetYA = sY - coordinates[i].top; } 1109 if(sX + rasterX < coordinates[i].right) {offsetXB = coordinates[i].right - rasterX; } 1110 if(sY + rasterY < coordinates[i].bottom) {offsetYB = coordinates[i].bottom - rasterY; } 1111 ubyte* p0 = spriteSet[i].getPtr(); 1112 //writeln(p0); 1113 for(int y = offsetYA ; y < coordinates[i].getYSize() - offsetYB ; y++){//for non flipped sprites 1114 //ushort[] chunk = (flipRegisters[i] == FlipRegister.Y || flipRegisters[i] == FlipRegister.XY) ? spriteSet[i].readRowReverse(y) : spriteSet[i].readRow(y); 1115 int offsetP = sizeX * y * 4, offsetY = (coordinates[i].top - sY + y)*pitch; 1116 int x = offsetXA; 1117 ubyte* c = p0 + x + offsetP; 1118 void* pl = (workpad + (offsetX + x * 4) + offsetY); 1119 for(; x < sizeX - offsetXB - 3 ; x+=4){ 1120 //writeln(x); 1121 ubyte[16] *p = cast(ubyte[16]*)pl; 1122 ubyte[16] src = *cast(ubyte[16]*)c; 1123 //src = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 1124 //ubyte[16] alpha = [src[12],src[12],src[12],src[12],src[8],src[8],src[8],src[8],src[4],src[4],src[4],src[4],src[0],src[0],src[0],src[0]]; 1125 ubyte[16] alpha = [src[0],src[0],src[0],src[0],src[4],src[4],src[4],src[4],src[8],src[8],src[8],src[8],src[12],src[12],src[12],src[12]]; 1126 //ubyte[16] alpha = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]; 1127 //uint[4] src; 1128 //uint[4] alpha; 1129 1130 asm{ 1131 //create the source 1132 1133 //calculating alpha 1134 //pxor XMM1, XMM1; 1135 movups XMM0, alpha; //a01 1136 movups XMM1, XMM0; //a23 1137 punpcklbw XMM0, XMM2; 1138 punpckhbw XMM1, XMM2; 1139 movaps XMM6, alphaSSEConst256; 1140 movaps XMM7, XMM6; 1141 movaps XMM4, alphaSSEConst1; 1142 movaps XMM5, XMM4; 1143 1144 1145 //punpcklbw XMM1, XMM2; 1146 1147 paddusw XMM4, XMM0; //1 + alpha01 1148 paddusw XMM5, XMM1; 1149 psubusw XMM6, XMM0; //256 - alpha01 1150 psubusw XMM7, XMM1; 1151 1152 //moving the values to their destinations 1153 mov EBX, p[EBP]; 1154 movups XMM0, src; //src01 1155 movups XMM1, XMM0; //src23 1156 punpcklbw XMM0, XMM2; 1157 punpckhbw XMM1, XMM2; 1158 pmullw XMM4, XMM0; //src01 * (1 + alpha01) 1159 pmullw XMM5, XMM1; //src23 * (1 + alpha23) 1160 movups XMM0, [EBX]; //dest01 1161 movups XMM1, XMM0; //dest23 1162 punpcklbw XMM0, XMM2; 1163 punpckhbw XMM1, XMM3; 1164 pmullw XMM6, XMM0; //dest01 * (256 - alpha) 1165 pmullw XMM7, XMM1; //dest23 * (256 - alpha) 1166 1167 paddusw XMM4, XMM6; //(src01 * (1 + alpha01)) + (dest01 * (256 - alpha01)) 1168 paddusw XMM5, XMM7; //(src * (1 + alpha)) + (dest * (256 - alpha)) 1169 psrlw XMM4, 8; //(src * (1 + alpha)) + (dest * (256 - alpha)) / 256 1170 psrlw XMM5, 8; 1171 //moving the result to its place; 1172 //pxor MM2, MM2; 1173 packuswb XMM4, XMM5; 1174 1175 movups [EBX], XMM4; 1176 1177 //emms; 1178 } 1179 //*p = [res[0],res[2],res[4],res[6]]; 1180 //ubyte[4] res = *p; 1181 //writeln(res); 1182 pl += 16; 1183 c += 16; 1184 //} 1185 } 1186 for(; x < sizeX - offsetXB ; x++){ 1187 //ubyte* c = p0 + x + offsetP; 1188 1189 ubyte[4] *p = cast(ubyte[4]*)pl; //(workpad + (offsetX + x)*4 + offsetY); 1190 ubyte[4] src = *cast(ubyte[4]*)c; //(c); 1191 ushort[4] alpha = [src[0],src[0],src[0],src[0]]; 1192 asm{ 1193 //pxor XMM3, XMM3; 1194 movq XMM2, alpha; 1195 mov EBX, p[EBP]; 1196 movd XMM0, [EBX]; 1197 movd XMM1, src; 1198 punpcklbw XMM0, XMM3;//dest 1199 punpcklbw XMM1, XMM3;//src 1200 1201 movaps XMM4, alphaSSEConst256; 1202 movaps XMM5, alphaSSEConst1; 1203 1204 paddusw XMM5, XMM2;//1+alpha 1205 psubusw XMM4, XMM2;//256-alpha 1206 1207 pmullw XMM0, XMM4;//dest*(256-alpha) 1208 pmullw XMM1, XMM5;//src*(1+alpha) 1209 paddusw XMM0, XMM1;//(src*(1+alpha))+(dest*(256-alpha)) 1210 psrlw XMM0, 8;//(src*(1+alpha))+(dest*(256-alpha))/256 1211 packuswb XMM0, XMM3; 1212 movd [EBX], XMM0; 1213 pxor XMM2, XMM2; 1214 } 1215 pl+=4; 1216 c+=4; 1217 } 1218 1219 } 1220 } 1221 } 1222 } 1223 }