Spack's 'py-numpy' is not multithreaded
In tutorial 03-benchmarks I obtain numpy via spack (2.2.4 & both hashes) and want to show how runtime correlates (and flatlines) with a increase in --cpu-per-task. Doing that I noticed that my numpy matrix-matrix multiplication only ever used a single core.
Inspecting np.__config__.show() shows a telling? USE_OPENMP=0 which might cause this behaviour?
Full np.__config__ here:
{
"Compilers": {
"c": {
"name": "gcc",
"linker": "ld.bfd",
"version": "14.2.0",
"commands": "/local/spack/20250423/spack/lib/spack/env/gcc/gcc"
},
"cython": {
"name": "cython",
"linker": "cython",
"version": "3.0.11",
"commands": "cython"
},
"c++": {
"name": "gcc",
"linker": "ld.bfd",
"version": "14.2.0",
"commands": "/local/spack/20250423/spack/lib/spack/env/gcc/g++"
}
},
"Machine Information": {
"host": {
"cpu": "x86_64",
"family": "x86_64",
"endian": "little",
"system": "linux"
},
"build": {
"cpu": "x86_64",
"family": "x86_64",
"endian": "little",
"system": "linux"
}
},
"Build Dependencies": {
"blas": {
"name": "openblas",
"found": true,
"version": "0.3.29",
"detection method": "pkgconfig",
"include directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/include",
"lib directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib",
"openblas configuration": "USE_64BITINT= DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP=0 GENERIC MAX_THREADS=48",
"pc file directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib/pkgconfig"
},
"lapack": {
"name": "openblas",
"found": true,
"version": "0.3.29",
"detection method": "pkgconfig",
"include directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/include",
"lib directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib",
"openblas configuration": "USE_64BITINT= DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP=0 GENERIC MAX_THREADS=48",
"pc file directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib/pkgconfig"
}
},
"Python Information": {
"path": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/python-venv-1.0-zawayqkgwz4bgqnrw4mxgc2nd3zkm6st/bin/python3",
"version": "3.13"
},
"SIMD Extensions": {
"baseline": [
"SSE",
"SSE2",
"SSE3"
],
"found": [
"SSSE3",
"SSE41",
"POPCNT",
"SSE42",
"AVX",
"F16C",
"FMA3",
"AVX2"
],
"not found": [
"AVX512F",
"AVX512CD",
"AVX512_KNL",
"AVX512_KNM",
"AVX512_SKX",
"AVX512_CLX",
"AVX512_CNL",
"AVX512_ICL"
]
}
}
Steps to Reproduce
# "works" with both
spack load py-numpy@2.2.4/sqoiw5y
spack load py-numpy@2.2.4/jolhhjs
python 03-benchmarks/workload.py
Edited by Adrian Köring