#include <stdio.h>
#include <string.h>
#include <emmintrin.h>

/* header pour fichier bmp */
typedef struct {
	unsigned short int id;
	unsigned int filesize;
	unsigned short int reserved1, reserved2;
	unsigned int offset;

	unsigned int size;
	int width;
	int height;
	unsigned short int planes;
	unsigned short int bits;
	unsigned int compression;
	unsigned int imagesize;
	int xresolution,yresolution;
	unsigned int ncolours;
	unsigned int importantcolours;
} __attribute__((packed)) header;

/* ecrire un fichier bmp (header predefini) */
void writebitmap(char* filename, char*bitmap, int width, int height)
{
	FILE* file;
	header head;
	head.id = 'MB';
	head.filesize = sizeof(header) + width*height * 3;
	head.reserved1 = 0;
	head.reserved2 = 0;
	head.offset = sizeof(header);
	head.size = 40;
	head.width = width;
	head.height = height;
	head.planes = 1;
	head.bits = 24;
	head.compression = 0;
	head.imagesize = 0;
	head.xresolution = 0;
	head.yresolution = 0;
	head.ncolours = 0;
	head.importantcolours = 0;

	file = fopen(filename, "w");
	fwrite(&head, sizeof(header), 1, file); 
	fwrite(bitmap, width*3, height, file);
	fclose(file);
}

#define WIDTH 1024
#define HEIGHT 768
unsigned char video[WIDTH*HEIGHT*3]; // stockage des pixels de l'image
int width=WIDTH;
int height=HEIGHT;
int bpp=3;

__v4sf two; // constante verctorielle (2,2,2,2)
__v4sf eight; // constante verctorielle (8,8,8,8)


/* ecriture d'un pixel bien placé */
void set_pix(int x, int y, int r, int g, int b)
{
	/* on calcule la position sur l'écran et on met des limites */
	int pos = bpp*(x + y*width);
	if(x<0 || x>=width) return;
	if(y<0 || y>=height)  return;
	/* et on ecrit directement en mémoire vidéo */
	video[pos] = r;
	video[pos+1] = g;
	video[pos+2] = b;
}

/* ecriture d'un pixel en utilisant une palette */
void set_pix_palette(int x, int y, int pos)
{
	int r,g,b;
	/* on garde l'intérieur en noir */
	if(pos == 0) return;
	/* on essaie de faire joli ... coloré quoi */
	if(pos < 64) {
		r = g = b = pos*2;
	} else if(pos < 128) {
		b = 128 + 2*(pos-64);
		g = 128 - 2*(pos-64);
		r = 128 - 2*(pos-64);
	} else {
		b = 256 - 2*(pos - 128);
		g = 0; 
		r = (pos - 128);
	}
	set_pix(x,y,r,g,b);
}

/* conversion en coordonnées flotantes (4 par 4) */
void convert_i2d4(int x, int y, __v4sf *ppx, __v4sf *ppy)
{
        /* on centre a droite pour optimiser la place */
	__v4sf x0 = _mm_set_ps(x,x+1,x+2,x+3);
	__v4sf h0 = _mm_set_ps(height, height, height, height);
	__v4sf h1 = h0;

	h1 = _mm_div_ps (h1, two);
	x0 = _mm_sub_ps (x0, h0);
	*ppx = _mm_div_ps (x0, h1); /* = (float)(x-height) / (height/2) */

	__v4sf y0 = _mm_set_ps(y,y,y,y);
	y0 = _mm_sub_ps (y0, h1);
        *ppy = _mm_div_ps (y0, h1); /* = (float)(y-height/2) / (height/2) */
}


/* une itération du calcul fractal pour 4 pixel */
__v4sf iteration4(__v4sf cx, __v4sf cy, __v4sf *ppx, __v4sf *ppy)
{
        __v4sf tx,ty;
        /* p = p^2 + C */
        tx = _mm_add_ps( _mm_sub_ps(_mm_mul_ps( *ppx, *ppx),  _mm_mul_ps(*ppy, *ppy)) , cx);
        ty = _mm_add_ps( _mm_mul_ps(two, _mm_mul_ps(*ppx, *ppy)), cy);
        *ppx = tx;
        *ppy = ty;
        /* |p|^2 */
        return _mm_add_ps(_mm_mul_ps(tx,tx), _mm_mul_ps(ty,ty));
}

/* affichage complet de la fractale */
void display()
{
        int x,y;
        int i;
	int *ptr;
        __v4sf cx,cy;
       	__v4sf px,py;
        __v4sf radius;
	__m128i ii;
	__m128i jj;
        /* on parcourt l'écran */
        for(x=0;x<width;x+=4) {
                for(y=0;y<height;y++) {
                        convert_i2d4(x,y,&cx,&cy);
                        px=cx;
                        py=cy;
			ii = _mm_setzero_si128();
                        /* on compte le nombre d'itération pour la couleur, plus joli */
                        for(i=0;i<255;i++) {
                                radius = iteration4(cx,cy,&px,&py);
				/* on compare au rayon max, la valeur de retour est constituées de 4 entiers valant 0 ou -1 */
				jj = (__m128i)_mm_cmple_ps(radius, eight);
				/* et on additionne tous les -1 */
				ii = _mm_add_epi32(ii, jj);
				/* un int fait 32 bits sur les i386 et les x86-64 ... */
				ptr = (int*)&jj;
				/* il y a un moment où ce n'est plus la peine de calculer */
				if(ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0)
					break;
                        }
			ptr = (int*)&ii;
			/* on dispose de 4 points */
			set_pix_palette(x,y,255+ptr[3]);
			set_pix_palette(x+1,y,255+ptr[2]);
			set_pix_palette(x+2,y,255+ptr[1]);
			set_pix_palette(x+3,y,255+ptr[0]);
                }
        }
}

/* ... */
int main()
{
	width = WIDTH;
	height = HEIGHT;
	two = _mm_set_ps(2,2,2,2);
	eight = _mm_set_ps(8,8,8,8);
	display();
	writebitmap("/tmp/test.bmp", video, width, height);
	return 0;
}
