#include #define ALIGNED(p) (!((long)p & (sizeof(long) - 1))) #define QUADBLOCKSIZE (sizeof(long) << 2) #define BLOCKSIZE (sizeof(long)) void *memcpy(void *dest, const void *src, size_t sz) { /* Only try word-writing if the pointers are aligned and * the buffer is big enough */ if (sz >= BLOCKSIZE && ALIGNED(dest) && ALIGNED(src)) { long *dest_word = dest; const long *src_word = src; /* First try copying 4x blocks per iteration */ while (sz >= QUADBLOCKSIZE) { *dest_word++ = *src_word++; *dest_word++ = *src_word++; *dest_word++ = *src_word++; *dest_word++ = *src_word++; sz -= QUADBLOCKSIZE; } /* Then try copying 1x words per iteration */ while (sz >= BLOCKSIZE) { *dest_word++ = *src_word++; sz -= BLOCKSIZE; } /* Then go back to the byte-level copying */ dest = dest_word; src = src_word; } char *dest0 = dest; const char *src0 = src; while (sz-- > 0) { *dest0++ = *src0++; } return dest; }