Index: oldkernel/linux/drivers/block/xor.c diff -u linux/drivers/block/xor.c:1.1 linux/drivers/block/xor.c:1.2 --- linux/drivers/block/xor.c:1.1 Thu Jun 1 14:53:31 2000 +++ linux/drivers/block/xor.c Thu Jun 1 15:06:21 2000 @@ -22,6 +22,10 @@ #include #include #endif +#ifdef __i386__ +#include +#include +#endif /* * we use the 'XOR function template' to register multiple xor @@ -66,7 +70,7 @@ #ifdef __i386__ -#ifdef CONFIG_X86_XMM +#ifdef CONFIG_X86_CPU_OPTIMIZATIONS /* * Cache avoiding checksumming functions utilizing KNI instructions * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) @@ -74,21 +78,13 @@ XORBLOCK_TEMPLATE(pIII_kni) { - char xmm_save[16*4]; - int cr0; - int lines = (bh_ptr[0]->b_size>>8); - - __asm__ __volatile__ ( - "movl %%cr0,%0 ;\n\t" - "clts ;\n\t" - "movups %%xmm0,(%1) ;\n\t" - "movups %%xmm1,0x10(%1) ;\n\t" - "movups %%xmm2,0x20(%1) ;\n\t" - "movups %%xmm3,0x30(%1) ;\n\t" - : "=r" (cr0) - : "r" (xmm_save) - : "memory" ); + char xmm_space[64]; + int lines = (bh_ptr[0]->b_size>>8); + int recursive = 0; + unsigned long flags; + kernel_take_fpu_kni(recursive,&xmm_space[0],NULL,flags); + #define OFFS(x) "8*("#x"*2)" #define PF0(x) \ " prefetcht0 "OFFS(x)"(%1) ;\n" @@ -157,7 +153,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data) : "memory" ); @@ -207,7 +203,7 @@ " decl %0 ;\n" " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data) @@ -266,7 +262,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -333,7 +329,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -343,16 +339,7 @@ break; } - __asm__ __volatile__ ( - "sfence ;\n\t" - "movups (%1),%%xmm0 ;\n\t" - "movups 0x10(%1),%%xmm1 ;\n\t" - "movups 0x20(%1),%%xmm2 ;\n\t" - "movups 0x30(%1),%%xmm3 ;\n\t" - "movl %0,%%cr0 ;\n\t" - : - : "r" (cr0), "r" (xmm_save) - : "memory" ); + kernel_release_fpu_kni(recursive,&xmm_space[0],flags); } #undef OFFS @@ -371,7 +358,7 @@ #undef XO5 #undef BLOCK -#endif /* CONFIG_X86_XMM */ +#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */ /* * high-speed RAID5 checksumming functions utilizing MMX instructions @@ -379,13 +366,12 @@ */ XORBLOCK_TEMPLATE(pII_mmx) { - char fpu_save[108]; int lines = (bh_ptr[0]->b_size>>7); - - if (!(current->flags & PF_USEDFPU)) - __asm__ __volatile__ ( " clts;\n"); + char mmx_space[32]; + int recursive = 0; + unsigned long flags; - __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); + kernel_take_fpu_mmx(recursive,&mmx_space[0],NULL,flags); #define LD(x,y) \ " movq 8*("#x")(%1), %%mm"#y" ;\n" @@ -431,7 +417,7 @@ " decl %0 ;\n" " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data) : "memory"); @@ -471,7 +457,7 @@ " decl %0 ;\n" " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data) @@ -517,7 +503,7 @@ " decl %0 ;\n" " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -569,7 +555,7 @@ " decl %0 ;\n" " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -579,10 +565,7 @@ break; } - __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); - - if (!(current->flags & PF_USEDFPU)) - stts(); + kernel_release_fpu_mmx(recursive,&mmx_space[0],flags); } #undef LD @@ -595,13 +578,12 @@ XORBLOCK_TEMPLATE(p5_mmx) { - char fpu_save[108]; int lines = (bh_ptr[0]->b_size>>6); - - if (!(current->flags & PF_USEDFPU)) - __asm__ __volatile__ ( " clts;\n"); + char mmx_space[32]; + int recursive = 0; + unsigned long flags; - __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); + kernel_take_fpu_mmx(recursive,&mmx_space[0],NULL,flags); switch(count) { case 2: @@ -618,21 +600,21 @@ " movq 24(%1), %%mm3 ;\n" " movq %%mm1, 8(%1) ;\n" " pxor 16(%2), %%mm2 ;\n" - " movq 32(%1), %%mm4 ;\n" + " movq 32(%1), %%mm0 ;\n" " movq %%mm2, 16(%1) ;\n" " pxor 24(%2), %%mm3 ;\n" - " movq 40(%1), %%mm5 ;\n" + " movq 40(%1), %%mm1 ;\n" " movq %%mm3, 24(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" + " pxor 32(%2), %%mm0 ;\n" + " movq 48(%1), %%mm2 ;\n" + " movq %%mm0, 32(%1) ;\n" + " pxor 40(%2), %%mm1 ;\n" + " movq 56(%1), %%mm3 ;\n" + " movq %%mm1, 40(%1) ;\n" + " pxor 48(%2), %%mm2 ;\n" + " pxor 56(%2), %%mm3 ;\n" + " movq %%mm2, 48(%1) ;\n" + " movq %%mm3, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" @@ -640,7 +622,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data) : "memory" ); @@ -662,26 +644,26 @@ " pxor 16(%3), %%mm2 ;\n" " movq 24(%1), %%mm3 ;\n" " movq %%mm1, 8(%1) ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" + " movq 32(%1), %%mm0 ;\n" + " movq 40(%1), %%mm1 ;\n" " pxor 24(%2), %%mm3 ;\n" " movq %%mm2, 16(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" + " pxor 32(%2), %%mm0 ;\n" " pxor 24(%3), %%mm3 ;\n" - " pxor 40(%2), %%mm5 ;\n" + " pxor 40(%2), %%mm1 ;\n" " movq %%mm3, 24(%1) ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" + " pxor 32(%3), %%mm0 ;\n" + " pxor 40(%3), %%mm1 ;\n" + " movq 48(%1), %%mm2 ;\n" + " movq %%mm0, 32(%1) ;\n" + " movq 56(%1), %%mm3 ;\n" + " pxor 48(%2), %%mm2 ;\n" + " movq %%mm1, 40(%1) ;\n" + " pxor 56(%2), %%mm3 ;\n" + " pxor 48(%3), %%mm2 ;\n" + " pxor 56(%3), %%mm3 ;\n" + " movq %%mm2, 48(%1) ;\n" + " movq %%mm3, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" @@ -690,7 +672,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data) @@ -714,33 +696,33 @@ " pxor 16(%3), %%mm2 ;\n" " pxor 8(%4), %%mm1 ;\n" " movq %%mm0, (%1) ;\n" - " movq 32(%1), %%mm4 ;\n" + " movq 32(%1), %%mm0 ;\n" " pxor 24(%2), %%mm3 ;\n" " pxor 16(%4), %%mm2 ;\n" " movq %%mm1, 8(%1) ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm1 ;\n" + " pxor 32(%2), %%mm0 ;\n" " pxor 24(%3), %%mm3 ;\n" " movq %%mm2, 16(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm1 ;\n" + " pxor 32(%3), %%mm0 ;\n" " pxor 24(%4), %%mm3 ;\n" " movq %%mm3, 24(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq 48(%1), %%mm6 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" + " movq 56(%1), %%mm3 ;\n" + " movq 48(%1), %%mm2 ;\n" + " pxor 40(%3), %%mm1 ;\n" + " pxor 32(%4), %%mm0 ;\n" + " pxor 48(%2), %%mm2 ;\n" + " movq %%mm0, 32(%1) ;\n" + " pxor 56(%2), %%mm3 ;\n" + " pxor 40(%4), %%mm1 ;\n" + " pxor 48(%3), %%mm2 ;\n" + " pxor 56(%3), %%mm3 ;\n" + " movq %%mm1, 40(%1) ;\n" + " pxor 48(%4), %%mm2 ;\n" + " pxor 56(%4), %%mm3 ;\n" + " movq %%mm2, 48(%1) ;\n" + " movq %%mm3, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" @@ -750,7 +732,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -782,34 +764,34 @@ " movq %%mm1, 8(%1) ;\n" " pxor 16(%5), %%mm2 ;\n" " pxor 24(%3), %%mm3 ;\n" - " movq 32(%1), %%mm4 ;\n" + " movq 32(%1), %%mm0 ;\n" " movq %%mm2, 16(%1) ;\n" " pxor 24(%4), %%mm3 ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm0 ;\n" + " movq 40(%1), %%mm1 ;\n" " pxor 24(%5), %%mm3 ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm0 ;\n" + " pxor 40(%2), %%mm1 ;\n" " movq %%mm3, 24(%1) ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 32(%5), %%mm4 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " pxor 40(%5), %%mm5 ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%5), %%mm6 ;\n" - " pxor 56(%5), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" + " pxor 32(%4), %%mm0 ;\n" + " pxor 40(%3), %%mm1 ;\n" + " movq 48(%1), %%mm2 ;\n" + " movq 56(%1), %%mm3 ;\n" + " pxor 32(%5), %%mm0 ;\n" + " pxor 40(%4), %%mm1 ;\n" + " pxor 48(%2), %%mm2 ;\n" + " pxor 56(%2), %%mm3 ;\n" + " movq %%mm0, 32(%1) ;\n" + " pxor 48(%3), %%mm2 ;\n" + " pxor 56(%3), %%mm3 ;\n" + " pxor 40(%5), %%mm1 ;\n" + " pxor 48(%4), %%mm2 ;\n" + " pxor 56(%4), %%mm3 ;\n" + " movq %%mm1, 40(%1) ;\n" + " pxor 48(%5), %%mm2 ;\n" + " pxor 56(%5), %%mm3 ;\n" + " movq %%mm2, 48(%1) ;\n" + " movq %%mm3, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" @@ -820,7 +802,7 @@ " jnz 1b ;\n" : - : "r" (lines), + : "m" (lines), "r" (bh_ptr[0]->b_data), "r" (bh_ptr[1]->b_data), "r" (bh_ptr[2]->b_data), @@ -830,10 +812,7 @@ break; } - __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); - - if (!(current->flags & PF_USEDFPU)) - stts(); + kernel_release_fpu_mmx(recursive,&mmx_space[0],flags); } #endif /* __i386__ */ #endif /* !__sparc_v9__ */ @@ -1811,11 +1790,12 @@ if (f->speed > fastest->speed) fastest = f; } -#ifdef CONFIG_X86_XMM - if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { +#ifdef CONFIG_X86_CPU_OPTIMIZATIONS + if ( (boot_cpu_data.mmu_cr4_features & X86_CR4_OSFXSR) && + (boot_cpu_data.x86_capability & X86_FEATURE_XMM) ) { fastest = &t_xor_block_pIII_kni; } -#endif +#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */ xor_block = fastest->xor_block; printk( "using fastest function: %s (%d.%03d MB/sec)\n", fastest->name, fastest->speed / 1000, fastest->speed % 1000); @@ -1847,8 +1827,9 @@ xor_speed(&t_xor_block_SPARC,&b1,&b2); #endif -#ifdef CONFIG_X86_XMM - if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { +#ifdef CONFIG_X86_CPU_OPTIMIZATIONS + if ( (boot_cpu_data.mmu_cr4_features & X86_CR4_OSFXSR) && + (boot_cpu_data.x86_capability & X86_FEATURE_XMM) ) { printk(KERN_INFO "raid5: KNI detected, trying cache-avoiding KNI checksum routine\n"); /* we force the use of the KNI xor block because it @@ -1859,7 +1840,7 @@ */ xor_speed(&t_xor_block_pIII_kni,&b1,&b2); } -#endif /* CONFIG_X86_XMM */ +#endif /* CONFIG_X86_CPU_OPTIMIZATIONS */ #ifdef __i386__