From 00acb0d7479770f7828c60e22dbc9186ba7f85fe Mon Sep 17 00:00:00 2001 From: Luboš Luňák Date: Wed, 26 Jan 2022 13:34:08 +0100 Subject: MSVC -arch is independent from the ability to use CPU intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible to write AVX512 intrinsics in code compile only with -arch:AVX . So do not require -arch for being able to do so, especially since there is no -arch option for only AVX512F without other AVX512 subsets (the option enables also CD, BW, DQ and VL https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64). https://crashreport.libreoffice.org/stats/crash_details/55ef825d-c323-4df9-95e2-76672c674e60 is presumably caused by this, I can see use of registers XMM0-15 in arraysumAVX512.cxx built with -arch:AVX2 but when built with -arch:AVX512 registers XMM16-31 are used too (I'm not sure if that's AVX512DQ or something else, I can't find info on it). Change-Id: I74473333a17e618327d43b920b8929d1b0e733b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129724 Reviewed-by: Mike Kaganski Reviewed-by: Luboš Luňák Tested-by: Jenkins (cherry picked from commit 807a15bd64c1f2a57371d12e7684541293cd9791) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129765 Reviewed-by: Miklos Vajna --- configure.ac | 31 ++++++++++++++++++------------- include/tools/simdsupport.hxx | 31 +++++++++---------------------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index 8a922cdcee4d..b37328188e38 100644 --- a/configure.ac +++ b/configure.ac @@ -7692,22 +7692,27 @@ if test "$GCC" = "yes" -o "$COM_IS_CLANG" = TRUE; then flag_f16c=-mf16c flag_fma=-mfma else - # https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86 - # MSVC seems to differentiate only between SSE and SSE2, where in fact - # SSE2 seems to be SSE2+. - # Even if -arch:SSE2 is the default, set it explicitly, so that the variable - # is not empty (and can be tested in gbuild). - flag_sse2=-arch:SSE2 - flag_ssse3=-arch:SSE2 - flag_sse41=-arch:SSE2 - flag_sse42=-arch:SSE2 + # With MSVC using -arch is in fact not necessary for being able + # to use CPU intrinsics, code using AVX512F intrinsics will compile + # even if compiled with -arch:AVX, the -arch option really only affects + # instructions generated for C/C++ code. + # So use the matching same (or lower) -arch options, but only in order + # to generate the best matching instructions for the C++ code surrounding + # the intrinsics. + # SSE2 is the default for x86/x64, so no need to specify the option. + flag_sse2= + # No specific options for these, use the next lower. + flag_ssse3="$flag_sse2" + flag_sse41="$flag_sse2" + flag_sse42="$flag_sse2" flag_avx=-arch:AVX flag_avx2=-arch:AVX2 flag_avx512=-arch:AVX512 - flag_avx512f=-arch:AVX512 - # These are part of -arch:AVX2 - flag_f16c=-arch:AVX2 - flag_fma=-arch:AVX2 + # Using -arch:AVX512 would enable more than just AVX512F, so use only AVX2. + flag_avx512f=-arch:AVX2 + # No MSVC options for these. + flag_f16c="$flag_sse2" + flag_fma="$flag_sse2" fi AC_MSG_CHECKING([whether $CXX can compile SSE2 intrinsics]) diff --git a/include/tools/simdsupport.hxx b/include/tools/simdsupport.hxx index 738b34e072db..fa8923bb095f 100644 --- a/include/tools/simdsupport.hxx +++ b/include/tools/simdsupport.hxx @@ -34,34 +34,21 @@ #if defined(_MSC_VER) // VISUAL STUDIO COMPILER -// SSE2 is required for X64 -#if (defined(_M_X64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2) +// With MSVC using -arch is in fact not necessary for being able +// to use CPU intrinsics, code using AVX512F intrinsics will compile +// even if compiled with -arch:AVX, the -arch option really only affects +// instructions generated for C/C++ code. +#if defined(_M_X64) || defined(_M_X86) +// As such, if we're building for X86 or X64, support for these is always available +// with MSVC2019+. #define LO_SSE2_AVAILABLE -#include -#endif // end SSE2 - -// compiled with /arch:AVX -#if defined(__AVX__) -#ifndef LO_SSE2_AVAILABLE -#define LO_SSE2_AVAILABLE -#include -#endif #define LO_SSSE3_AVAILABLE #define LO_AVX_AVAILABLE -#include -#endif // end defined(__AVX__) - -// compiled with /arch:AVX2 -#if defined(__AVX2__) #define LO_AVX2_AVAILABLE -#include -#endif // defined(__AVX2__) - -// compiled with /arch:AVX512F -#if defined(__AVX512F__) #define LO_AVX512F_AVAILABLE +#include #include -#endif // defined(__AVX512F__) +#endif #else // compiler Clang and GCC -- cgit