[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

memmove,memset



鈴木(康)です。

mgl2 のチューニングの過程で、memmove と memset がとても遅いという
ことに気が付きました。( ちなみに memcpy は builtin 版)

memcpy       :  11179 k byte/sec
memmove_fwd  :   1751 k byte/sec
memmove_back :   1748 k byte/sec
memset       :   2627 k byte/sec

xmemset      :  17367 k byte/sec
xmemmove_fwd :  10440 k byte/sec
xmemmove_back:  10423 k byte/sec


ベンチマークプログラム (と mgl で使おうと思っている C 版)を
添付します。

--- ここから
#include <stdio.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>

static long long millitime(void) {
        struct timeval n;
        gettimeofday(&n,0);
        return (n.tv_sec *1000 + n.tv_usec/1000);
}

void *
mgl_memset(void *dst, int data, size_t bytes) {
	char *dst1=dst;
	int *dst4;
	int bytes4;
	int bytes32;
	int data32;
	while (((int)dst1 & 0x3) && bytes) {
		*dst1++ = data;
		bytes--;
	}
	if (!bytes) return dst;
	dst4 = (int *)dst1;
	bytes4 = (bytes /4);
	bytes32 = (bytes4 /8);
	bytes -= bytes4 * 4;
	bytes4 -= bytes32 * 8;
	data32 = 0x01010101 * (data & 0xff);
	while (bytes32--) {
		dst4[0] = dst4[1] = dst4[2] = dst4[3] = 
		dst4[4] = dst4[5] = dst4[6] = dst4[7] = data32;
		dst4 += 8;
	}
	while (bytes4--) {
		*dst4++ = data32;
	}
	dst1 = (char *)dst4;
	while (bytes--) {
		*dst1++ = data;
	}
	return dst;
}

void *
mgl_memmove(void *dst,const void *src, size_t bytes) {
   if (dst > src) {
	char *dst1=dst+bytes;
	const char *src1=src+bytes;

	while (((int)dst1 & 0x3) && bytes--) {
		*(--dst1) = *(--src1);
	}
	if (!((int)src1 & 0x3)) {
		int bytes4;
		int bytes32;
		int *dst4=(int *)dst1;
		const int *src4=(int *)src1;

		bytes4 = (bytes /4);
		bytes32 = (bytes4 /8);
		bytes -= bytes4 * 4;
		bytes4 -= bytes32 * 8;
		while (bytes32--) {
			dst4 -= 8;
			src4 -= 8;
			dst4[0] = src4[0];
			dst4[1] = src4[1];
			dst4[2] = src4[2];
			dst4[3] = src4[3];
			dst4[4] = src4[4];
			dst4[5] = src4[5];
			dst4[6] = src4[6];
			dst4[7] = src4[7];
		}
		while (bytes4--) {
			*(--dst4) = *(--src4);
		}
		dst1 = (char *)dst4;
		src1 = (char *)src4;
	}
	while (bytes--) {
		*(--dst1) = *(--src1);
	}
  } else {
	char *dst1=dst;
	const char *src1=src;

	while (((int)dst1 & 0x3) && bytes--) {
		*dst1++ = *src1++;
	}
	if (!((int)src1 & 0x3)) {
		int bytes4;
		int bytes32;
		int *dst4=(int *)dst1;
		const int *src4=(int *)src1;

		bytes4 = (bytes /4);
		bytes32 = (bytes4 /8);
		bytes -= bytes4 * 4;
		bytes4 -= bytes32 * 8;
		while (bytes32--) {
			dst4[0] = src4[0];
			dst4[1] = src4[1];
			dst4[2] = src4[2];
			dst4[3] = src4[3];
			dst4[4] = src4[4];
			dst4[5] = src4[5];
			dst4[6] = src4[6];
			dst4[7] = src4[7];
			dst4 += 8;
			src4 += 8;
		}
		while (bytes4--) {
			*dst4++ = *src4++;
		}
		dst1 = (char *)dst4;
		src1 = (char *)src4;
	}
	while (bytes--) {
		*dst1++ = *src1++;
	}
   }
   return dst;
}

char buf1[1000*100];
char buf2[1000*100];


#define LOOP_COUNT	1000
main(int argc,char *argv[]) {
	char *p;
	int pen_color = argc;
	int i,j,k;
	long long s,e;
	int dps;
	int type;
	char *b1,*b2;


	for (type = 0 ; type < 7; type++) {
	  s = millitime();
	  for (i=0; i< LOOP_COUNT; i++) {
	    b1 = buf1;
	    b2 = buf2;
	    if (type == 0) {
		for (j=0; j<100; j++) {
		 memcpy(b1,b2,1000);
		 b1+=1000;
		 b2+=1000;
		}
	    } else if (type == 1) {
		for (j=0; j<100; j++) {
		memmove(b1,b2,1000);
		b1+=1000;
		b2+=1000;
		}
	    } else if (type == 2) {
		for (j=0; j<100; j++) {
		memmove(b2,b1,1000);
		b1+=1000;
		b2+=1000;
	       }
	    } else if (type == 3) {
		for (j=0; j<100; j++) {
		memset(b1,0,1000);
		b1+=1000;
	       }
	    } else if (type == 4) {
		for (j=0; j<100; j++) {
		mgl_memset(b1,0,1000);
		b1+=1000;
	       }
	    } else if (type == 5) {
		for (j=0; j<100; j++) {
		 mgl_memmove(b1,b2,1000);
		 b1+=1000;
		 b2+=1000;
	       }
	    } else if (type == 6) {
		for (j=0; j<100; j++) {
		 mgl_memmove(b2,b1,1000);
		 b1+=1000;
		 b2+=1000;
	       }
	    }
	  }
	  e = millitime();
	  sleep(1); /* wait for flush cache */
	  if (e == s) {
		printf("error\n");
	  } else {
	    dps = LOOP_COUNT * 100 * 1000 /(int)(e - s);
	    printf("%s: %6d k byte/sec\n"
			,(type==0)?"memcpy       "
			:(type==1)?"memmove_fwd  "
			:(type==2)?"memmove_back "
			:(type==3)?"memset       "
			:(type==4)?"xmemset      "
			:(type==5)?"xmemmove_fwd "
			:(type==6)?"xmemmove_back"
			:"?"
			,dps);
	  }
	}
}
--- ここまで
--
					鈴木 康司 @NEC
					suz@hpc.bs1.fc.nec.co.jp
					TEL 0423-33-5381