00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00033 #include <unistd.h>
00034 #include <string.h>
00035
00036
00037 #if 1 // defined(C_ONLY) // bk010102 - dedicated?
00038 void Com_Memcpy (void* dest, const void* src, const size_t count) {
00039 memcpy(dest, src, count);
00040 }
00041
00042 void Com_Memset (void* dest, const int val, const size_t count) {
00043 memset(dest, val, count);
00044 }
00045
00046 #else
00047
00048 typedef enum {
00049 PRE_READ,
00050 PRE_WRITE,
00051 PRE_READ_WRITE
00052 } e_prefetch;
00053
00054 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
00055
00056 void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
00057
00058
00059
00060 __asm__ __volatile__ (" \
00061 //mov eax,constant // eax = val \
00062 //mov edx,dest // dest \
00063 //mov ecx,count \
00064 movd %%eax, %%mm0 \
00065 punpckldq %%mm0, %%mm0 \
00066 \
00067 // ensure that destination is qword aligned \
00068 \
00069 testl $7, %%edx // qword padding?\
00070 jz 0f \
00071 movl %%eax, (%%edx) \
00072 decl %%ecx \
00073 addl $4, %%edx \
00074 \
00075 0: movl %%ecx, %%ebx \
00076 andl $0xfffffff0, %%ecx \
00077 jz 2f \
00078 jmp 1f \
00079 .align 16 \
00080 \
00081 // funny ordering here to avoid commands \
00082 // that cross 32-byte boundaries (the \
00083 // [edx+0] version has a special 3-byte opcode... \
00084 1: movq %%mm0, 8(%%edx) \
00085 movq %%mm0, 16(%%edx) \
00086 movq %%mm0, 24(%%edx) \
00087 movq %%mm0, 32(%%edx) \
00088 movq %%mm0, 40(%%edx) \
00089 movq %%mm0, 48(%%edx) \
00090 movq %%mm0, 56(%%edx) \
00091 movq %%mm0, (%%edx)\
00092 addl $64, %%edx \
00093 subl $16, %%ecx \
00094 jnz 1b \
00095 2: \
00096 movl %%ebx, %%ecx // ebx = cnt \
00097 andl $0xfffffff0, %%ecx // ecx = cnt&~15 \
00098 subl %%ecx, %%ebx \
00099 jz 6f \
00100 cmpl $8, %%ebx \
00101 jl 3f \
00102 \
00103 movq %%mm0, (%%edx) \
00104 movq %%mm0, 8(%%edx) \
00105 movq %%mm0, 16(%%edx) \
00106 movq %%mm0, 24(%%edx) \
00107 addl $32, %%edx \
00108 subl $8, %%ebx \
00109 jz 6f \
00110 \
00111 3: cmpl $4, %%ebx \
00112 jl 4f \
00113 \
00114 movq %%mm0, (%%edx) \
00115 movq %%mm0, 8(%%edx) \
00116 addl $16, %%edx \
00117 subl $4, %%ebx \
00118 \
00119 4: cmpl $2, %%ebx \
00120 jl 5f \
00121 movq %%mm0, (%%edx) \
00122 addl $8, %%edx \
00123 subl $2, %%ebx \
00124 \
00125 5: cmpl $1, %%ebx \
00126 jl 6f \
00127 movl %%eax, (%%edx) \
00128 6: \
00129 emms \
00130 "
00131 : : "a" (constant), "c" (count), "d" (dest)
00132 : "%ebx", "%edi", "%esi", "cc", "memory");
00133 }
00134
00135
00136
00137 void Com_Memcpy (void* dest, const void* src, const size_t count) {
00138 Com_Prefetch (src, count, PRE_READ);
00139 __asm__ __volatile__ (" \
00140 pushl %%edi \
00141 pushl %%esi \
00142 //mov ecx,count \
00143 cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \
00144 je 6f \
00145 //mov edx,dest \
00146 movl %0, %%ebx \
00147 cmpl $32, %%ecx // padding only? \
00148 jl 1f \
00149 \
00150 movl %%ecx, %%edi \
00151 andl $0xfffffe00, %%edi // edi = count&~31 \
00152 subl $32, %%edi \
00153 \
00154 .align 16 \
00155 0: \
00156 movl (%%ebx, %%edi, 1), %%eax \
00157 movl 4(%%ebx, %%edi, 1), %%esi \
00158 movl %%eax, (%%edx, %%edi, 1) \
00159 movl %%esi, 4(%%edx, %%edi, 1) \
00160 movl 8(%%ebx, %%edi, 1), %%eax \
00161 movl 12(%%ebx, %%edi, 1), %%esi \
00162 movl %%eax, 8(%%edx, %%edi, 1) \
00163 movl %%esi, 12(%%edx, %%edi, 1) \
00164 movl 16(%%ebx, %%edi, 1), %%eax \
00165 movl 20(%%ebx, %%edi, 1), %%esi \
00166 movl %%eax, 16(%%edx, %%edi, 1) \
00167 movl %%esi, 20(%%edx, %%edi, 1) \
00168 movl 24(%%ebx, %%edi, 1), %%eax \
00169 movl 28(%%ebx, %%edi, 1), %%esi \
00170 movl %%eax, 24(%%edx, %%edi, 1) \
00171 movl %%esi, 28(%%edx, %%edi, 1) \
00172 subl $32, %%edi \
00173 jge 0b \
00174 \
00175 movl %%ecx, %%edi \
00176 andl $0xfffffe00, %%edi \
00177 addl %%edi, %%ebx // increase src pointer \
00178 addl %%edi, %%edx // increase dst pointer \
00179 andl $31, %%ecx // new count \
00180 jz 6f // if count = 0, get outta here \
00181 \
00182 1: \
00183 cmpl $16, %%ecx \
00184 jl 2f \
00185 movl (%%ebx), %%eax \
00186 movl %%eax, (%%edx) \
00187 movl 4(%%ebx), %%eax \
00188 movl %%eax, 4(%%edx) \
00189 movl 8(%%ebx), %%eax \
00190 movl %%eax, 8(%%edx) \
00191 movl 12(%%ebx), %%eax \
00192 movl %%eax, 12(%%edx) \
00193 subl $16, %%ecx \
00194 addl $16, %%ebx \
00195 addl $16, %%edx \
00196 2: \
00197 cmpl $8, %%ecx \
00198 jl 3f \
00199 movl (%%ebx), %%eax \
00200 movl %%eax, (%%edx) \
00201 movl 4(%%ebx), %%eax \
00202 subl $8, %%ecx \
00203 movl %%eax, 4(%%edx) \
00204 addl $8, %%ebx \
00205 addl $8, %%edx \
00206 3: \
00207 cmpl $4, %%ecx \
00208 jl 4f \
00209 movl (%%ebx), %%eax // here 4-7 bytes \
00210 addl $4, %%ebx \
00211 subl $4, %%ecx \
00212 movl %%eax, (%%edx) \
00213 addl $4, %%edx \
00214 4: // 0-3 remaining bytes \
00215 cmpl $2, %%ecx \
00216 jl 5f \
00217 movw (%%ebx), %%ax // two bytes \
00218 cmpl $3, %%ecx // less than 3? \
00219 movw %%ax, (%%edx) \
00220 jl 6f \
00221 movb 2(%%ebx), %%al // last byte \
00222 movb %%al, 2(%%edx) \
00223 jmp 6f \
00224 5: \
00225 cmpl $1, %%ecx \
00226 jl 6f \
00227 movb (%%ebx), %%al \
00228 movb %%al, (%%edx) \
00229 6: \
00230 popl %%esi \
00231 popl %%edi \
00232 "
00233 : : "m" (src), "d" (dest), "c" (count)
00234 : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
00235 }
00236
00237 void Com_Memset (void* dest, const int val, const size_t count)
00238 {
00239 unsigned int fillval;
00240
00241 if (count < 8)
00242 {
00243 __asm__ __volatile__ (" \
00244 //mov edx,dest \
00245 //mov eax, val \
00246 movb %%al, %%ah \
00247 movl %%eax, %%ebx \
00248 andl $0xffff, %%ebx \
00249 shll $16, %%eax \
00250 addl %%ebx, %%eax // eax now contains pattern \
00251 //mov ecx,count \
00252 cmpl $4, %%ecx \
00253 jl 0f \
00254 movl %%eax, (%%edx) // copy first dword \
00255 addl $4, %%edx \
00256 subl $4, %%ecx \
00257 0: cmpl $2, %%ecx \
00258 jl 1f \
00259 movw %%ax, (%%edx) // copy 2 bytes \
00260 addl $2, %%edx \
00261 subl $2, %%ecx \
00262 1: cmpl $0, %%ecx \
00263 je 2f \
00264 movb %%al, (%%edx) // copy single byte \
00265 2: \
00266 "
00267 : : "d" (dest), "a" (val), "c" (count)
00268 : "%ebx", "%edi", "%esi", "cc", "memory");
00269
00270 return;
00271 }
00272
00273 fillval = val;
00274
00275 fillval = fillval|(fillval<<8);
00276 fillval = fillval|(fillval<<16);
00277
00278 _copyDWord ((unsigned int*)(dest),fillval, count/4);
00279
00280 __asm__ __volatile__ (" // padding of 0-3 bytes \
00281 //mov ecx,count \
00282 movl %%ecx, %%eax \
00283 andl $3, %%ecx \
00284 jz 1f \
00285 andl $0xffffff00, %%eax \
00286 //mov ebx,dest \
00287 addl %%eax, %%edx \
00288 movl %0, %%eax \
00289 cmpl $2, %%ecx \
00290 jl 0f \
00291 movw %%ax, (%%edx) \
00292 cmpl $2, %%ecx \
00293 je 1f \
00294 movb %%al, 2(%%edx) \
00295 jmp 1f \
00296 0: \
00297 cmpl $0, %%ecx\
00298 je 1f\
00299 movb %%al, (%%edx)\
00300 1: \
00301 "
00302 : : "m" (fillval), "c" (count), "d" (dest)
00303 : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
00304 }
00305
00306 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
00307 {
00308
00309
00310
00311
00312 switch (type)
00313 {
00314 case PRE_WRITE : break;
00315 case PRE_READ:
00316 case PRE_READ_WRITE:
00317
00318 __asm__ __volatile__ ("\
00319 //mov ebx,s\
00320 //mov ecx,bytes\
00321 cmpl $4096, %%ecx // clamp to 4kB\
00322 jle 0f\
00323 movl $4096, %%ecx\
00324 0:\
00325 addl $0x1f, %%ecx\
00326 shrl $5, %%ecx // number of cache lines\
00327 jz 2f\
00328 jmp 1f\
00329 \
00330 .align 16\
00331 1: testb %%al, (%%edx)\
00332 addl $32, %%edx\
00333 decl %%ecx\
00334 jnz 1b\
00335 2:\
00336 "
00337 : : "d" (s), "c" (bytes)
00338 : "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
00339
00340 break;
00341 }
00342 }
00343
00344 #endif