Main Page | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

linux_common.c

Go to the documentation of this file.
00001 /*
00002 ===========================================================================
00003 Copyright (C) 1999-2005 Id Software, Inc.
00004 
00005 This file is part of Quake III Arena source code.
00006 
00007 Quake III Arena source code is free software; you can redistribute it
00008 and/or modify it under the terms of the GNU General Public License as
00009 published by the Free Software Foundation; either version 2 of the License,
00010 or (at your option) any later version.
00011 
00012 Quake III Arena source code is distributed in the hope that it will be
00013 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 GNU General Public License for more details.
00016 
00017 You should have received a copy of the GNU General Public License
00018 along with Foobar; if not, write to the Free Software
00019 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00020 ===========================================================================
00021 */
00033 #include <unistd.h>   // AH - for size_t
00034 #include <string.h>
00035 
00036 // bk001207 - we need something under Linux, too. Mac?
00037 #if 1 // defined(C_ONLY) // bk010102 - dedicated?
00038 void Com_Memcpy (void* dest, const void* src, const size_t count) {
00039   memcpy(dest, src, count);
00040 }
00041 
00042 void Com_Memset (void* dest, const int val, const size_t count) {
00043   memset(dest, val, count);
00044 }
00045 
00046 #else
00047 
00048 typedef enum {
00049   PRE_READ,         // prefetch assuming that buffer is used for reading only
00050   PRE_WRITE,        // prefetch assuming that buffer is used for writing only
00051   PRE_READ_WRITE    // prefetch assuming that buffer is used for both reading and writing
00052 } e_prefetch;
00053 
00054 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
00055 
00056 void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
00057     // MMX version not used on standard Pentium MMX
00058     // because the dword version is faster (with
00059     // proper destination prefetching)
00060         __asm__ __volatile__ (" \
00061             //mov           eax,constant        // eax = val \
00062             //mov           edx,dest            // dest \
00063             //mov           ecx,count \
00064             movd        %%eax, %%mm0 \
00065             punpckldq   %%mm0, %%mm0 \
00066 \
00067             // ensure that destination is qword aligned \
00068 \
00069             testl       $7, %%edx               // qword padding?\
00070             jz      0f  \
00071             movl        %%eax, (%%edx) \
00072             decl        %%ecx \
00073             addl        $4, %%edx \
00074 \
00075 0:          movl        %%ecx, %%ebx                \
00076             andl        $0xfffffff0, %%ecx  \
00077             jz      2f \
00078             jmp     1f \
00079             .align      16 \
00080 \
00081             // funny ordering here to avoid commands \
00082             // that cross 32-byte boundaries (the \
00083             // [edx+0] version has a special 3-byte opcode... \
00084 1:          movq        %%mm0, 8(%%edx) \
00085             movq        %%mm0, 16(%%edx) \
00086             movq        %%mm0, 24(%%edx) \
00087             movq        %%mm0, 32(%%edx) \
00088             movq        %%mm0, 40(%%edx) \
00089             movq        %%mm0, 48(%%edx) \
00090             movq        %%mm0, 56(%%edx) \
00091             movq        %%mm0, (%%edx)\
00092             addl        $64, %%edx \
00093             subl        $16, %%ecx \
00094             jnz     1b \
00095 2:  \
00096             movl        %%ebx, %%ecx                // ebx = cnt \
00097             andl        $0xfffffff0, %%ecx              // ecx = cnt&~15 \
00098             subl        %%ecx, %%ebx \
00099             jz      6f \
00100             cmpl        $8, %%ebx \
00101             jl      3f \
00102 \
00103             movq        %%mm0, (%%edx) \
00104             movq        %%mm0, 8(%%edx) \
00105             movq        %%mm0, 16(%%edx) \
00106             movq        %%mm0, 24(%%edx) \
00107             addl        $32, %%edx \
00108             subl        $8, %%ebx \
00109             jz      6f \
00110 \
00111 3:          cmpl        $4, %%ebx \
00112             jl      4f \
00113             \
00114             movq        %%mm0, (%%edx) \
00115             movq        %%mm0, 8(%%edx) \
00116             addl        $16, %%edx \
00117             subl        $4, %%ebx \
00118 \
00119 4:          cmpl        $2, %%ebx \
00120             jl      5f \
00121             movq        %%mm0, (%%edx) \
00122             addl        $8, %%edx \
00123             subl        $2, %%ebx \
00124 \
00125 5:          cmpl        $1, %%ebx \
00126             jl      6f \
00127             movl        %%eax, (%%edx) \
00128 6: \
00129             emms \
00130     "
00131     : : "a" (constant), "c" (count), "d" (dest)
00132     : "%ebx", "%edi", "%esi", "cc", "memory");
00133 }
00134 
00135 // optimized memory copy routine that handles all alignment
00136 // cases and block sizes efficiently
00137 void Com_Memcpy (void* dest, const void* src, const size_t count) {
00138     Com_Prefetch (src, count, PRE_READ);
00139     __asm__ __volatile__ (" \
00140         pushl       %%edi \
00141         pushl       %%esi \
00142         //mov       ecx,count \
00143         cmpl        $0, %%ecx                       // count = 0 check (just to be on the safe side) \
00144         je      6f \
00145         //mov       edx,dest \
00146         movl        %0, %%ebx \
00147         cmpl        $32, %%ecx                      // padding only? \
00148         jl      1f \
00149 \
00150         movl        %%ecx, %%edi                    \
00151         andl        $0xfffffe00, %%edi                  // edi = count&~31 \
00152         subl        $32, %%edi \
00153 \
00154         .align 16 \
00155 0: \
00156         movl        (%%ebx, %%edi, 1), %%eax \
00157         movl        4(%%ebx, %%edi, 1), %%esi \
00158         movl        %%eax, (%%edx, %%edi, 1) \
00159         movl        %%esi, 4(%%edx, %%edi, 1) \
00160         movl        8(%%ebx, %%edi, 1), %%eax \
00161         movl        12(%%ebx, %%edi, 1), %%esi \
00162         movl        %%eax, 8(%%edx, %%edi, 1) \
00163         movl        %%esi, 12(%%edx, %%edi, 1) \
00164         movl        16(%%ebx, %%edi, 1), %%eax \
00165         movl        20(%%ebx, %%edi, 1), %%esi \
00166         movl        %%eax, 16(%%edx, %%edi, 1) \
00167         movl        %%esi, 20(%%edx, %%edi, 1) \
00168         movl        24(%%ebx, %%edi, 1), %%eax \
00169         movl        28(%%ebx, %%edi, 1), %%esi \
00170         movl        %%eax, 24(%%edx, %%edi, 1) \
00171         movl        %%esi, 28(%%edx, %%edi, 1) \
00172         subl        $32, %%edi \
00173         jge     0b \
00174         \
00175         movl        %%ecx, %%edi \
00176         andl        $0xfffffe00, %%edi \
00177         addl        %%edi, %%ebx                    // increase src pointer \
00178         addl        %%edi, %%edx                    // increase dst pointer \
00179         andl        $31, %%ecx                  // new count \
00180         jz      6f                  // if count = 0, get outta here \
00181 \
00182 1: \
00183         cmpl        $16, %%ecx \
00184         jl      2f \
00185         movl        (%%ebx), %%eax \
00186         movl        %%eax, (%%edx) \
00187         movl        4(%%ebx), %%eax \
00188         movl        %%eax, 4(%%edx) \
00189         movl        8(%%ebx), %%eax \
00190         movl        %%eax, 8(%%edx) \
00191         movl        12(%%ebx), %%eax \
00192         movl        %%eax, 12(%%edx) \
00193         subl        $16, %%ecx \
00194         addl        $16, %%ebx \
00195         addl        $16, %%edx \
00196 2: \
00197         cmpl        $8, %%ecx \
00198         jl      3f \
00199         movl        (%%ebx), %%eax \
00200         movl        %%eax, (%%edx) \
00201         movl        4(%%ebx), %%eax \
00202         subl        $8, %%ecx \
00203         movl        %%eax, 4(%%edx) \
00204         addl        $8, %%ebx \
00205         addl        $8, %%edx \
00206 3: \
00207         cmpl        $4, %%ecx \
00208         jl      4f \
00209         movl        (%%ebx), %%eax  // here 4-7 bytes \
00210         addl        $4, %%ebx \
00211         subl        $4, %%ecx \
00212         movl        %%eax, (%%edx) \
00213         addl        $4, %%edx \
00214 4:                          // 0-3 remaining bytes \
00215         cmpl        $2, %%ecx \
00216         jl      5f \
00217         movw        (%%ebx), %%ax   // two bytes \
00218         cmpl        $3, %%ecx               // less than 3? \
00219         movw        %%ax, (%%edx) \
00220         jl      6f \
00221         movb        2(%%ebx), %%al  // last byte \
00222         movb        %%al, 2(%%edx) \
00223         jmp     6f \
00224 5: \
00225         cmpl        $1, %%ecx \
00226         jl      6f \
00227         movb        (%%ebx), %%al \
00228         movb        %%al, (%%edx) \
00229 6: \
00230         popl        %%esi \
00231         popl        %%edi \
00232     "
00233     : : "m" (src), "d" (dest), "c" (count)
00234     : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
00235 }
00236 
00237 void Com_Memset (void* dest, const int val, const size_t count)
00238 {
00239     unsigned int fillval;
00240 
00241     if (count < 8)
00242     {
00243         __asm__ __volatile__ (" \
00244             //mov       edx,dest \
00245             //mov       eax, val \
00246             movb        %%al, %%ah \
00247             movl        %%eax, %%ebx \
00248             andl        $0xffff, %%ebx \
00249             shll        $16, %%eax \
00250             addl        %%ebx, %%eax    // eax now contains pattern \
00251             //mov       ecx,count \
00252             cmpl        $4, %%ecx \
00253             jl      0f \
00254             movl        %%eax, (%%edx)  // copy first dword \
00255             addl        $4, %%edx \
00256             subl        $4, %%ecx \
00257     0:      cmpl        $2, %%ecx \
00258             jl      1f \
00259             movw        %%ax, (%%edx)   // copy 2 bytes \
00260             addl        $2, %%edx \
00261             subl        $2, %%ecx \
00262     1:      cmpl        $0, %%ecx \
00263             je      2f \
00264             movb        %%al, (%%edx)   // copy single byte \
00265     2:       \
00266         "
00267         : : "d" (dest), "a" (val), "c" (count)
00268         : "%ebx", "%edi", "%esi", "cc", "memory");
00269         
00270         return;
00271     }
00272 
00273     fillval = val;
00274     
00275     fillval = fillval|(fillval<<8);
00276     fillval = fillval|(fillval<<16);        // fill dword with 8-bit pattern
00277 
00278     _copyDWord ((unsigned int*)(dest),fillval, count/4);
00279     
00280     __asm__ __volatile__ ("             // padding of 0-3 bytes \
00281         //mov       ecx,count \
00282         movl        %%ecx, %%eax \
00283         andl        $3, %%ecx \
00284         jz      1f \
00285         andl        $0xffffff00, %%eax \
00286         //mov       ebx,dest \
00287         addl        %%eax, %%edx \
00288         movl        %0, %%eax \
00289         cmpl        $2, %%ecx \
00290         jl      0f \
00291         movw        %%ax, (%%edx) \
00292         cmpl        $2, %%ecx \
00293         je      1f                  \
00294         movb        %%al, 2(%%edx)      \
00295         jmp     1f \
00296 0:      \
00297         cmpl        $0, %%ecx\
00298         je      1f\
00299         movb        %%al, (%%edx)\
00300 1:  \
00301     "
00302     : : "m" (fillval), "c" (count), "d" (dest)
00303     : "%eax", "%ebx", "%edi", "%esi", "cc", "memory");  
00304 }
00305 
00306 void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
00307 {
00308     // write buffer prefetching is performed only if
00309     // the processor benefits from it. Read and read/write
00310     // prefetching is always performed.
00311 
00312     switch (type)
00313     {
00314         case PRE_WRITE : break;
00315         case PRE_READ:
00316         case PRE_READ_WRITE:
00317 
00318         __asm__ __volatile__ ("\
00319             //mov       ebx,s\
00320             //mov       ecx,bytes\
00321             cmpl        $4096, %%ecx                // clamp to 4kB\
00322             jle     0f\
00323             movl        $4096, %%ecx\
00324     0:\
00325             addl        $0x1f, %%ecx\
00326             shrl        $5, %%ecx                   // number of cache lines\
00327             jz      2f\
00328             jmp     1f\
00329 \
00330             .align 16\
00331     1:      testb       %%al, (%%edx)\
00332             addl        $32, %%edx\
00333             decl        %%ecx\
00334             jnz     1b\
00335     2:\
00336         "
00337         : : "d" (s), "c" (bytes)
00338         : "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
00339         
00340         break;
00341     }
00342 }
00343 
00344 #endif

Generated on Thu Aug 25 12:38:02 2005 for Quake III Arena by  doxygen 1.3.9.1