Block size is still 8.
...
...
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
// find box test offsets for each edge
int eo1 = 0;
if (FDY12 < 0) eo1 -= FDY12 << 3; //meaning y1 is above y2, block size is 8 ==> shl 3
if (FDX12 > 0) eo1 += FDX12 << 3; //meaning x1 is right of x2
int eo2 = 0;
if (FDY23 < 0) eo2 -= FDY23 << 3; //meaning y2 is above y3
if (FDX23 > 0) eo2 += FDX23 << 3; //meaning x2 is right of x3
int eo3 = 0;
if (FDY31 < 0) eo3 -= FDY31 << 3; //meaning y3 is above y1
if (FDX31 > 0) eo3 += FDX31 << 3; //meaning x3 is right of x1
//these are the offsets fo the bottom-right block corner
int ei1 = (DX12 << 7) - (DY12 << 7) - eo1; //block size is 8 ==> shl 3 + 4
int ei2 = (DX23 << 7) - (DY23 << 7) - eo2;
int ei3 = (DX31 << 7) - (DY31 << 7) - eo3;
// Loop through blocks
for(int y = miny; y < maxy; y += q) {
unsigned char filledFlag = 0; //variable that states if we have found a somehow filled block already
for(int x = minx; x < maxx; x += q) {
// Corners of block
int x0 = x << 4;
int y0 = y << 4;
int CX1 = C1 + DX12 * y0 - DY12 * x0;
int CX2 = C2 + DX23 * y0 - DY23 * x0;
int CX3 = C3 + DX31 * y0 - DY31 * x0;
// Skip block when outside an edge
if((CX1+eo1) < 0 || (CX2+eo2) < 0 || (CX3+eo3) < 0) {
unsigned int *cbuffer = tempBuffer;
if (filledFlag & 0x80) {
//we have hit a filled block before, but now we've hit an empty one. we can skip to the next line.
break;
}
else {
//we have NOT hit a filled block before, so we just skip to the next one.
continue;
}
}
//Accept whole block when totally covered
if((CX1+ei1) > 0 && (CX2+ei2) > 0 && (CX3+ei3) > 0) {
unsigned int *cbuffer = tempBuffer;
for(int iy = 0; iy < q; iy++) {
for(int ix = x; ix < x + q; ix++) {
cbuffer[ix] = 0x00007F00; // Green
}
cbuffer = (unsigned int *)((char *)cbuffer + target->bytesPerLine);
}
}
else {
// Partially covered block
unsigned int *cbuffer = tempBuffer;
int CY1 = CX1;
int CY2 = CX2;
int CY3 = CX3;
for(int iy = y; iy < y + q; iy++) {
CX1 = CY1;
CX2 = CY2;
CX3 = CY3;
for(int ix = x; ix < x + q; ix++) {
if(CX1 > 0 && CX2 > 0 && CX3 > 0) {
cbuffer[ix] = 0x0000007F; // Blue
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
cbuffer = (unsigned int *)((char *)cbuffer + target->bytesPerLine);
}
}
filledFlag = 0x80;
}
tempBuffer = (unsigned int *)((char *)tempBuffer + q * target->bytesPerLine);
}
};
Though there are much less calculations going on, the code is not much faster (as in blazing fast) than the original version, because eo1/eo2/eo3 are kept in memory.
The code to calculate + test for "fully inside" (ei1, ei2, ei3) slows down the algorithm a lot too, probably because of the lack of registers. There might be another way of calculating those offsets off the eoX variables.
A slight improvement is the filledFlag variable that skips to the next line of blocks when we have encountered a filled and an empty block in sequence (because that means the rest of the line is empty...). I tried that mechanism for partially filled blocks too (skipping lines of pixels), but noticed no improvement...
comments and flames are welcome ;)













