>From 23ad3a438ef7bcd34e2354f6cbdede634f0188d4 Mon Sep 17 00:00:00 2001 From: Dave Love Date: Fri, 22 Dec 2017 12:48:29 +0000 Subject: [PATCH 1/2] gnu: maths: Fix cache size detected by openblas on some x86_64. * gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch, gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch: New files. * gnu/packages/maths.scm (openblas)[source]: Use them. * gnu/local.mk: Register them. --- gnu/local.mk | 2 + gnu/packages/maths.scm | 6 +- ...mplementation-of-cpuid_count-for-the-CPUI.patch | 36 ++++ ...-with-subleafs-to-query-L1-cache-size-on-.patch | 190 +++++++++++++++++++++ 4 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch create mode 100644 gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch diff --git a/gnu/local.mk b/gnu/local.mk index ac1c8ac2a..e69ccce34 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -930,6 +930,8 @@ dist_patch_DATA = \ %D%/packages/patches/omake-fix-non-determinism.patch \ %D%/packages/patches/ola-readdir-r.patch \ %D%/packages/patches/openscenegraph-ffmpeg3.patch \ + %D%/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch \ + %D%/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch \ %D%/packages/patches/openexr-missing-samples.patch \ %D%/packages/patches/openfoam-4.1-cleanup.patch \ %D%/packages/patches/openldap-CVE-2017-9287.patch \ diff --git a/gnu/packages/maths.scm b/gnu/packages/maths.scm index bb7dbee4b..8a48ec2fc 100644 --- a/gnu/packages/maths.scm +++ b/gnu/packages/maths.scm @@ -2629,7 +2629,11 @@ parts of it.") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 - "1bd03c5xni0bla0wg1wba841b36b0sg13sjja955kn5xzvy4i61a")))) + "1bd03c5xni0bla0wg1wba841b36b0sg13sjja955kn5xzvy4i61a")) + (patches + (search-patches + "openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch" + "openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch")))) (build-system gnu-build-system) (arguments `(#:tests? #f ;no "check" target diff --git a/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch b/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch new file mode 100644 index 000000000..d9cff2cf0 --- /dev/null +++ b/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch @@ -0,0 +1,36 @@ +From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 12 Jul 2017 21:56:23 +0200 +Subject: [PATCH 2/2] Add dummy implementation of cpuid_count for the CPUIDEMU + case + +--- + cpuid_x86.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/cpuid_x86.c b/cpuid_x86.c +index 73b4df6b..103128a3 100644 +--- a/cpuid_x86.c ++++ b/cpuid_x86.c +@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * + *edx = idlist[current].d; + } + ++void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { ++ return cpuid (op, eax, ebx, ecx, edx); ++} ++ + #endif + + #endif // _MSC_VER +@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + } + } + } +- + cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); + if (cpuid_level >= 0x80000006) { + if(L2.size<=0){ +-- +2.11.0 + diff --git a/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch b/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch new file mode 100644 index 000000000..a58c08e23 --- /dev/null +++ b/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch @@ -0,0 +1,190 @@ +From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 12 Jul 2017 20:43:09 +0200 +Subject: [PATCH 1/2] Use cpuid 4 with subleafs to query L1 cache size on Intel + processors + +--- + cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 102 insertions(+), 15 deletions(-) + +diff --git a/cpuid_x86.c b/cpuid_x86.c +index ab2ecdca..73b4df6b 100644 +--- a/cpuid_x86.c ++++ b/cpuid_x86.c +@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) + *edx = cpuInfo[3]; + } + ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx) ++{ ++ int cpuInfo[4] = {-1}; ++ __cpuidex(cpuInfo, op, count); ++ *eax = cpuInfo[0]; ++ *ebx = cpuInfo[1]; ++ *ecx = cpuInfo[2]; ++ *edx = cpuInfo[3]; ++} ++ + #else + + #ifndef CPUIDEMU + + #if defined(__APPLE__) && defined(__i386__) + void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx); + #else + static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + #if defined(__i386__) && defined(__PIC__) +@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); + #endif + } ++ ++static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){ ++#if defined(__i386__) && defined(__PIC__) ++ __asm__ __volatile__ ++ ("mov %%ebx, %%edi;" ++ "cpuid;" ++ "xchgl %%ebx, %%edi;" ++ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#else ++ __asm__ __volatile__ ++ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#endif ++} + #endif + + #else +@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + cpuid(0, &cpuid_level, &ebx, &ecx, &edx); + + if (cpuid_level > 1) { +- ++ int numcalls =0 ; + cpuid(2, &eax, &ebx, &ecx, &edx); +- ++ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries + info[ 0] = BITMASK(eax, 8, 0xff); + info[ 1] = BITMASK(eax, 16, 0xff); + info[ 2] = BITMASK(eax, 24, 0xff); +@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + info[14] = BITMASK(edx, 24, 0xff); + + for (i = 0; i < 15; i++){ +- + switch (info[i]){ + + /* This table is from http://www.sandpile.org/ia32/cpuid.htm */ +@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LD1.linesize = 64; + break; + case 0x63 : +- DTB.size = 2048; +- DTB.associative = 4; +- DTB.linesize = 32; +- LDTB.size = 4096; +- LDTB.associative= 4; +- LDTB.linesize = 32; ++ DTB.size = 2048; ++ DTB.associative = 4; ++ DTB.linesize = 32; ++ LDTB.size = 4096; ++ LDTB.associative= 4; ++ LDTB.linesize = 32; ++ break; + case 0x66 : + LD1.size = 8; + LD1.associative = 4; +@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LC1.associative = 8; + break; + case 0x76 : +- ITB.size = 2048; +- ITB.associative = 0; +- ITB.linesize = 8; +- LITB.size = 4096; +- LITB.associative= 0; +- LITB.linesize = 8; ++ ITB.size = 2048; ++ ITB.associative = 0; ++ ITB.linesize = 8; ++ LITB.size = 4096; ++ LITB.associative= 0; ++ LITB.linesize = 8; ++ break; + case 0x77 : + LC1.size = 16; + LC1.associative = 4; +@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + } + + if (get_vendor() == VENDOR_INTEL) { ++ if(LD1.size<=0 || LC1.size<=0){ ++ //If we didn't detect L1 correctly before, ++ int count; ++ for (count=0;count <4;count++) { ++ cpuid_count(4, count, &eax, &ebx, &ecx, &edx); ++ switch (eax &0x1f) { ++ case 0: ++ continue; ++ case 1: ++ case 3: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 data cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LD1.size = (assoc*part*lines*sets)/1024; ++ LD1.associative = assoc; ++ LD1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ } ++ case 2: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 instruction cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LC1.size = (assoc*part*lines*sets)/1024; ++ LC1.associative = assoc; ++ LC1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ ++ } ++ default: ++ break; ++ } ++ } ++ } ++ + cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); + if (cpuid_level >= 0x80000006) { + if(L2.size<=0){ +-- +2.11.0 + -- 2.11.0