diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eb2e3ab53..1cb149aef 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,16 +53,16 @@ jobs: run: | mkdir build cd build - # Disable -march=native and pin CPU instruction set to AVX2+FMA+F16C so - # the released x86_64 binary runs on hosts without AVX-512. - # Without GGML_NATIVE=OFF, ggml's CMake auto-enables every extension - # the build runner's CPU has (including AVX-512 on Azure Xeon - # Platinum 8370C runners), which then SIGILLs on AVX-512-less hosts. + # Build a fat package: one libstable-diffusion.so plus a libggml-cpu-*.so + # per CPU variant (sandybridge, haswell, skylakex, icelake, alderlake, + # x64). At runtime ggml dlopens whichever variant is highest-priority on + # the host CPU, so an AVX-512 host gets AVX-512 perf and an AVX-512-less + # host falls back to haswell — same zip, no -march=native runner + # lottery, no SIGILL. cmake .. \ -DGGML_NATIVE=OFF \ - -DGGML_AVX2=ON \ - -DGGML_FMA=ON \ - -DGGML_F16C=ON \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ -DSD_BUILD_SHARED_LIBS=ON cmake --build . --config Release @@ -513,16 +513,16 @@ jobs: run: | mkdir build cd build - # Same portability concern as ubuntu-latest-cmake: pin the host CPU - # instruction set so the binary runs on AVX-512-less ROCm hosts too. + # Fat package: same approach as ubuntu-latest-cmake. The HIPBLAS build + # still uses ggml's CPU ops for parts of the pipeline (CLIP encoding, + # etc.), so it benefits from per-CPU variants the same way. cmake .. -G Ninja \ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ -DCMAKE_HIP_FLAGS="-mllvm --amdgpu-unroll-threshold-local=600" \ -DCMAKE_BUILD_TYPE=Release \ -DGGML_NATIVE=OFF \ - -DGGML_AVX2=ON \ - -DGGML_FMA=ON \ - -DGGML_F16C=ON \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ -DSD_HIPBLAS=ON \ -DHIP_PLATFORM=amd \ -DGPU_TARGETS="${{ matrix.gpu_targets }}" \