Skip to content

Spack's 'py-numpy' is not multithreaded

In tutorial 03-benchmarks I obtain numpy via spack (2.2.4 & both hashes) and want to show how runtime correlates (and flatlines) with a increase in --cpu-per-task. Doing that I noticed that my numpy matrix-matrix multiplication only ever used a single core.

Inspecting np.__config__.show() shows a telling? USE_OPENMP=0 which might cause this behaviour?

Full np.__config__ here:
{
  "Compilers": {
    "c": {
      "name": "gcc",
      "linker": "ld.bfd",
      "version": "14.2.0",
      "commands": "/local/spack/20250423/spack/lib/spack/env/gcc/gcc"
    },
    "cython": {
      "name": "cython",
      "linker": "cython",
      "version": "3.0.11",
      "commands": "cython"
    },
    "c++": {
      "name": "gcc",
      "linker": "ld.bfd",
      "version": "14.2.0",
      "commands": "/local/spack/20250423/spack/lib/spack/env/gcc/g++"
    }
  },
  "Machine Information": {
    "host": {
      "cpu": "x86_64",
      "family": "x86_64",
      "endian": "little",
      "system": "linux"
    },
    "build": {
      "cpu": "x86_64",
      "family": "x86_64",
      "endian": "little",
      "system": "linux"
    }
  },
  "Build Dependencies": {
    "blas": {
      "name": "openblas",
      "found": true,
      "version": "0.3.29",
      "detection method": "pkgconfig",
      "include directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/include",
      "lib directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib",
      "openblas configuration": "USE_64BITINT= DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP=0 GENERIC MAX_THREADS=48",
      "pc file directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib/pkgconfig"
    },
    "lapack": {
      "name": "openblas",
      "found": true,
      "version": "0.3.29",
      "detection method": "pkgconfig",
      "include directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/include",
      "lib directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib",
      "openblas configuration": "USE_64BITINT= DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP=0 GENERIC MAX_THREADS=48",
      "pc file directory": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/openblas-0.3.29-jf6xgmkslkir2uq6v3jvfwkmh3g5r3wd/lib/pkgconfig"
    }
  },
  "Python Information": {
    "path": "/local/spack/20250423/spack/opt/spack/linux-rocky9-x86_64_v3/gcc-14.2.0/python-venv-1.0-zawayqkgwz4bgqnrw4mxgc2nd3zkm6st/bin/python3",
    "version": "3.13"
  },
  "SIMD Extensions": {
    "baseline": [
      "SSE",
      "SSE2",
      "SSE3"
    ],
    "found": [
      "SSSE3",
      "SSE41",
      "POPCNT",
      "SSE42",
      "AVX",
      "F16C",
      "FMA3",
      "AVX2"
    ],
    "not found": [
      "AVX512F",
      "AVX512CD",
      "AVX512_KNL",
      "AVX512_KNM",
      "AVX512_SKX",
      "AVX512_CLX",
      "AVX512_CNL",
      "AVX512_ICL"
    ]
  }
}

Steps to Reproduce

# "works" with both
spack load py-numpy@2.2.4/sqoiw5y
spack load py-numpy@2.2.4/jolhhjs
python 03-benchmarks/workload.py
Edited by Adrian Köring