Skip to content

Commit ddd7eb8

Browse files
committed
cmake: add USE_ARCH_INTRINSICS and USE_COMPILER_BUILTINS CMake options
This introduces the USE_ARCH_INTRINSICS CMake option. It is enabled by default. Disabling it is meant to disable custom asm code and usage of intrinsincs functions for the target platform in the Dæmon code base, it may also be used by games built with the Dæmon common code base. It is not meant to disable asm or intrinsincs usage in third-party libraries. It is not meant to prevent the compiler to use such intrinsics in its optimization passes. It is not meant to disable the compiler flags we set to tell the compiler to try to use such intrinsics in its optimization passes. For this, one should disable USE_CPU_RECOMMENDED_FEATURES instead. For obvious reason the asm code in the BREAKPOINT() implementation is not meant to be disabled by USE_ARCH_INTRINSICS. The macro syntax is: DAEMON_ARCH_INTRINSICS_(architecture)[_extension] Examples: - DAEMON_ARCH_INTRINSICS_i686: i686 specific code, including asm code. - DAEMON_ARCH_INTRINSICS_i686_sse: i686 SSE specific code. - DAEMON_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code. If a platform inherits feature from an parent platform, the parent platform name is used. For example on amd64, the definition enabling SSE code is DAEMON_ARCH_INTRINSICS_i686_sse, enabling SSE code on both i686 with SSE and amd64 platforms. and both DAEMON_ARCH_INTRINSICS_amd64 and DAEMON_ARCH_INTRINSICS_i686 are available. This also introduces USE_COMPILER_BUILTINS CMake option. It is enabled by default. Disabling it is meant to test the unknown compiler code.
1 parent 6bcac3e commit ddd7eb8

6 files changed

Lines changed: 79 additions & 19 deletions

File tree

cmake/DaemonArchitecture.cmake

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,27 @@ endif()
9090

9191
# Quotes cannot be part of the define as support for them is not reliable.
9292
add_definitions(-DNACL_ARCH_STRING=${NACL_ARCH})
93+
94+
option(USE_ARCH_INTRINSICS "Enable custom code using intrinsics functions or asm declarations" ON)
95+
96+
macro(set_arch_intrinsics name)
97+
if (USE_ARCH_INTRINSICS)
98+
message(STATUS "Enabling ${name} architecture intrinsics")
99+
add_definitions(-DDAEMON_USE_ARCH_INTRINSICS_${name}=1)
100+
else()
101+
message(STATUS "Disabling ${name} architecture intrinsics")
102+
endif()
103+
endmacro()
104+
105+
if (USE_ARCH_INTRINSICS)
106+
add_definitions(-DDAEMON_USE_ARCH_INTRINSICS=1)
107+
endif()
108+
109+
set_arch_intrinsics(${ARCH})
110+
111+
set(amd64_PARENT "i686")
112+
set(arm64_PARENT "armhf")
113+
114+
if (${ARCH}_PARENT)
115+
set_arch_intrinsics(${${ARCH}_PARENT})
116+
endif()

cmake/DaemonFlags.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ include(CheckCXXCompilerFlag)
2929

3030
add_definitions(-DDAEMON_BUILD_${CMAKE_BUILD_TYPE})
3131

32+
option(USE_COMPILER_BUILTINS "Enable usage of compiler builtins" ON)
33+
34+
if (USE_COMPILER_BUILTINS)
35+
add_definitions(-DDAEMON_USE_COMPILER_BUILTINS=1)
36+
message(STATUS "Enabling compiler builtins")
37+
else()
38+
message(STATUS "Disabling compiler builtins")
39+
endif()
40+
3241
# Set flag without checking, optional argument specifies build type
3342
macro(set_c_flag FLAG)
3443
if (${ARGC} GREATER 1)

src/common/Compiler.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ int CountTrailingZeroes(unsigned int x);
4141
int CountTrailingZeroes(unsigned long x);
4242
int CountTrailingZeroes(unsigned long long x);
4343

44-
#if defined( __GNUC__ )
44+
#if defined(DAEMON_USE_ARCH_BUILTINS) && defined( __GNUC__ )
4545
inline int CountTrailingZeroes(unsigned int x)
4646
{ return __builtin_ctz(x); }
4747
inline int CountTrailingZeroes(unsigned long x)
4848
{ return __builtin_ctzl(x); }
4949
inline int CountTrailingZeroes(unsigned long long x)
5050
{ return __builtin_ctzll(x); }
51-
#elif defined( _MSC_VER )
51+
#elif defined(DAEMON_USE_ARCH_BUILTINS) && defined( _MSC_VER )
5252
inline int CountTrailingZeroes(unsigned int x)
5353
{ unsigned long ans; _BitScanForward(&ans, x); return ans; }
5454
inline int CountTrailingZeroes(unsigned long x)
@@ -75,7 +75,7 @@ int CountTrailingZeroes(unsigned long long x);
7575
#endif
7676

7777
// GCC and Clang
78-
#if defined( __GNUC__ )
78+
#if defined(DAEMON_USE_COMPILER_BUILTINS) && defined( __GNUC__ )
7979

8080
// Emit a nice warning when a function is used
8181
#define DEPRECATED __attribute__((__deprecated__))
@@ -119,6 +119,7 @@ int CountTrailingZeroes(unsigned long long x);
119119

120120
// Raise an exception and break in the debugger
121121
#if defined(DAEMON_ARCH_i686) || defined(DAEMON_ARCH_amd64)
122+
// Always run this asm code even if DAEMON_USE_ARCH_INTRINSICS is not defined.
122123
#define BREAKPOINT() __asm__ __volatile__("int $3\n\t")
123124
#elif defined(DAEMON_ARCH_nacl)
124125
// TODO: find how to implement breakpoint on NaCl
@@ -177,7 +178,7 @@ See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0627r0.pdf */
177178
#endif
178179

179180
// Microsoft Visual C++
180-
#elif defined( _MSC_VER )
181+
#elif defined(DAEMON_USE_COMPILER_BUILTINS) && defined( _MSC_VER )
181182

182183
// Disable some warnings
183184
#pragma warning(disable : 4100) // unreferenced formal parameter

src/common/Platform.h

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,39 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6363
#define __x86_64__ 1
6464
#endif
6565

66-
// SSE support
67-
#if defined(__x86_64__) || defined(__SSE__) || _M_IX86_FP >= 1
68-
#include <xmmintrin.h>
69-
#if defined(__x86_64__) || defined(__SSE2__) || _M_IX86_FP >= 2
70-
#include <emmintrin.h>
71-
#define idx86_sse 2
72-
#else
73-
#define idx86_sse 1
74-
#endif
66+
/* The definition name syntax is: DAEMON_USE_ARCH_INTRINSICS_<architecture>[_extension]
67+
68+
Examples:
69+
70+
- DAEMON_USE_ARCH_INTRINSICS_i686: i686 specific code, including asm code.
71+
- DAEMON_USE_ARCH_INTRINSICS_i686_sse: i686 SSE specific code.
72+
- DAEMON_USE_ARCH_INTRINSICS_i686_sse2: i686 SSE2 specific code.
73+
74+
If a architecture inherits a feature from an parent architecture, the parent
75+
architecture name is used. For example on amd64, the definition enabling
76+
SSE code is DAEMON_USE_ARCH_INTRINSICS_i686_sse, enabling SSE code on both
77+
i686 with SSE and amd64.
78+
79+
The definitions for the architecture itself are automatically set by CMake. */
80+
81+
#if defined(DAEMON_USE_INTRINSICS)
82+
// Set architecture extensions definitions.
83+
#if defined(__SSE2__) || _M_IX86_FP >= 2
84+
#define DAEMON_USE_ARCH_INTRINSICS_i686_sse2
85+
#endif
86+
87+
#if defined(__SSE__) || _M_IX86_FP >= 1
88+
#define DAEMON_USE_ARCH_INTRINSICS_i686_sse
89+
#endif
90+
91+
// Include intrinsics-specific headers.
92+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
93+
#include <xmmintrin.h>
94+
#endif
95+
96+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse2)
97+
#include <emmintrin.h>
98+
#endif
7599
#endif
76100

77101
// VM Prefixes

src/engine/qcommon/q_math.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ void SetPlaneSignbits( cplane_t *out )
740740

741741
int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p )
742742
{
743-
#if idx86_sse
743+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
744744
auto mins = sseLoadVec3Unsafe( emins );
745745
auto maxs = sseLoadVec3Unsafe( emaxs );
746746
auto normal = sseLoadVec3Unsafe( p->normal );
@@ -1799,7 +1799,7 @@ void MatrixSetupShear( matrix_t m, vec_t x, vec_t y )
17991799

18001800
void MatrixMultiply( const matrix_t a, const matrix_t b, matrix_t out )
18011801
{
1802-
#if idx86_sse
1802+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
18031803
//#error MatrixMultiply
18041804
int i;
18051805
__m128 _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
@@ -3288,7 +3288,8 @@ void QuatTransformVectorInverse( const quat_t q, const vec3_t in, vec3_t out )
32883288
VectorAdd( out, tmp2, out );
32893289
}
32903290

3291-
#if !idx86_sse
3291+
// The SSE variants are inline functions in q_shared.h file.
3292+
#if !defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
32923293
// create an identity transform
32933294
void TransInit( transform_t *t )
32943295
{

src/engine/qcommon/q_shared.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ void Com_Free_Aligned( void *ptr );
248248
// floats (quat: 4, scale: 1, translation: 3), which is very
249249
// convenient for SSE and GLSL, which operate on 4-dimensional
250250
// float vectors.
251-
#if idx86_sse
251+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
252252
// Here we have a union of scalar struct and sse struct, transform_u and the
253253
// scalar struct must match transform_t so we have to use anonymous structs.
254254
// We disable compiler warnings when using -Wpedantic for this specific case.
@@ -375,7 +375,7 @@ extern const quat_t quatIdentity;
375375
float y;
376376

377377
// compute approximate inverse square root
378-
#if defined( idx86_sse )
378+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
379379
// SSE rsqrt relative error bound: 3.7 * 10^-4
380380
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
381381
#elif idppc
@@ -774,7 +774,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
774774
//=============================================
775775
// combining Transformations
776776

777-
#if idx86_sse
777+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
778778
/* swizzles for _mm_shuffle_ps instruction */
779779
#define SWZ_XXXX 0x00
780780
#define SWZ_YXXX 0x01
@@ -1293,6 +1293,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
12931293
t->sseRot = sseQuatNormalize( t->sseRot );
12941294
}
12951295
#else
1296+
// The non-SSE variants are in q_math.cpp file.
12961297
void TransInit( transform_t *t );
12971298
void TransCopy( const transform_t *in, transform_t *out );
12981299

0 commit comments

Comments
 (0)