Re: Display functions optimization
Posted: Wed Jan 14, 2015 4:12 am
jonnection wrote:I operate on bytes at a time using shifts and masks. This means that all my drawing routines draw bytes at a time ie. there is no putpixel. It also means that I have different routines for horizontal and vertical orientation and that clipping with screen boundaries is more complicated.
That sounds similar to what I was discussing regarding storing bitmaps in the same format as the LCD buffer. Doing that in my little demo gets drawBitmap down to ~155ns (~14 times faster than the library version), aligning y to a multiple of 8 gets it to 50ns (43x faster). And that's without any loop unrolling or hand-tweaked assembly:
- Code: Select all
#include <SPI.h>
#include <Gamebuino.h>
Gamebuino gb;
const byte sprite[] PROGMEM = {
16, 16, 0x1f,0xf8,0x1f,0xf8,0x1f,0xfc,0x1f,0xff,0x1f,0xff,0xf,0xff,0xf,0xff,0x7,0xff,0x87,0xff,0x3,0xff,0x1,0xff,0x0,0x7f,0x2,0x1f,0x0,0x0,0x0,0x0,0x40,0x0,};
const byte swizzled_sprite[] PROGMEM = {
16, 16,
0x00,0x00,0x00,0x1f,0x7f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xfc,0xf8,0xf8,
0x01,0x80,0x00,0x00,0x00,0x01,0x13,0x07,0x07,0x0f,0x0f,0x1f,0x1f,0x1f,0x1f,0x1f
};
void setup(){
gb.begin();
}
void loop(){
long start, finish;
int drawTime;
if(gb.update()){
const int count = 200;
start = millis();
for (int i=0; i<count; i++)
gb.display.drawBitmap(1, 31, sprite);
finish = millis();
drawTime = 1000L*(finish-start)/count;
gb.display.print(F("drawBitmap: "));
gb.display.print(drawTime);
gb.display.println(F("ns"));
start = millis();
for (int i=0; i<count; i++)
drawBitmapUnrolled(17, 31, sprite);
finish = millis();
drawTime = 1000L*(finish-start)/count;
gb.display.print(F("unrolled: "));
gb.display.print(drawTime);
gb.display.println(F("ns"));
start = millis();
for (int i=0; i<count; i++)
drawBitmapSwizzled(33, 31, swizzled_sprite);
finish = millis();
drawTime = 1000L*(finish-start)/count;
gb.display.print(F("swizzled: "));
gb.display.print(drawTime);
gb.display.println(F("ns"));
start = millis();
for (int i=0; i<count; i++)
drawBitmapSwizzledAligned(49, 32, swizzled_sprite);
finish = millis();
drawTime = 1000L*(finish-start)/count;
gb.display.print(F("aligned: "));
gb.display.print(drawTime);
gb.display.println(F("ns"));
}
}
void drawBitmapUnrolled(int8_t x, int8_t y, const uint8_t *bitmap) {
int8_t w = pgm_read_byte(bitmap);
int8_t h = pgm_read_byte(bitmap + 1);
bitmap = bitmap + 2; //add an offset to the pointer to start after the width and height
int8_t i, j, byteWidth = (w + 7) >> 3;
uint8_t * screen_line = gb.display.getBuffer() + x;
const uint8_t * bitmap_line = bitmap;
uint8_t dsty = y;
uint8_t mask = _BV(y & 7);
for (j = 0; j < h; j++, dsty++, bitmap_line+=byteWidth, mask = (mask & 0x80) ? 1 : (mask<<1))
{
int ofs = (dsty >> 3) * LCDWIDTH_NOROT;
int8_t dstx = x;
const uint8_t * bitmap_src = bitmap_line;
uint8_t * ptr = screen_line + ofs;
int8_t i = w;
while (i >= 8)
{
uint8_t pixels = pgm_read_byte(bitmap_src++);
if (pixels & 0x80)
ptr[0] |= mask;
if (pixels & 0x40)
ptr[1] |= mask;
if (pixels & 0x20)
ptr[2] |= mask;
if (pixels & 0x10)
ptr[3] |= mask;
if (pixels & 0x08)
ptr[4] |= mask;
if (pixels & 0x04)
ptr[5] |= mask;
if (pixels & 0x02)
ptr[6] |= mask;
if (pixels & 0x01)
ptr[7] |= mask;
ptr += 8;
i -= 8;
dstx += 8;
}
if (i)
{
uint8_t pixels = pgm_read_byte(bitmap_src);
while (i--)
{
if (pixels & 0x80)
gb.display.drawPixel(dstx, dsty);
pixels <<= 1;
}
}
}
}
void drawBitmapSwizzled(int8_t x, int8_t y, const uint8_t *bitmap) {
int8_t w = pgm_read_byte(bitmap);
int8_t h = pgm_read_byte(bitmap + 1);
bitmap = bitmap + 2;
uint8_t * line = gb.display.getBuffer() + (y >> 3) * LCDWIDTH_NOROT + x;
unsigned scale1 = _BV((y & 7) + 8);
unsigned scale2 = _BV((y & 7));
for (int y=0; y<h; y+=8, line+=LCDWIDTH_NOROT)
{
uint8_t * dst = line;
for (int x=0; x<w; x++, dst++)
{
uint8_t pixels = pgm_read_byte(bitmap++);
*dst |= (pixels * scale1) >> 8;
*(dst+LCDWIDTH_NOROT) |= (pixels * scale2) >> 8;
}
}
}
void drawBitmapSwizzledAligned(int8_t x, int8_t y, const uint8_t *bitmap) {
int8_t w = pgm_read_byte(bitmap);
int8_t h = pgm_read_byte(bitmap + 1);
bitmap = bitmap + 2;
uint8_t * line = gb.display.getBuffer() + (y >> 3) * LCDWIDTH_NOROT + x;
unsigned scale1 = _BV((y & 7) + 8);
unsigned scale2 = _BV((y & 7));
for (int y=0; y<h; y+=8, line+=LCDWIDTH_NOROT-w)
for (int x=0; x<w; x+=2, line+=2, bitmap+=2)
*(unsigned *)line |= pgm_read_word(bitmap);
}