/**************************************************************************** * * ftcalc.h * * Arithmetic computations (specification). * * Copyright (C) 1996-2024 by * David Turner, Robert Wilhelm, and Werner Lemberg. * * This file is part of the FreeType project, and may only be used, * modified, and distributed under the terms of the FreeType project * license, LICENSE.TXT. By continuing to use, modify, or distribute * this file you indicate that you have read the license and * understand and accept it fully. * */ #ifndef FTCALC_H_ #define FTCALC_H_ #include #include "compiler-macros.h" FT_BEGIN_HEADER /************************************************************************** * * FT_MulDiv() and FT_MulFix() are declared in freetype.h. * */ #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER /* Provide assembler fragments for performance-critical functions. */ /* These must be defined `static __inline__' with GCC. */ #if defined( __CC_ARM ) || defined( __ARMCC__ ) /* RVCT */ #define FT_MULFIX_ASSEMBLER FT_MulFix_arm /* documentation is in freetype.h */ static __inline FT_Int32 FT_MulFix_arm( FT_Int32 a, FT_Int32 b ) { FT_Int32 t, t2; __asm { smull t2, t, b, a /* (lo=t2,hi=t) = a*b */ mov a, t, asr #31 /* a = (hi >> 31) */ add a, a, #0x8000 /* a += 0x8000 */ adds t2, t2, a /* t2 += a */ adc t, t, #0 /* t += carry */ mov a, t2, lsr #16 /* a = t2 >> 16 */ orr a, a, t, lsl #16 /* a |= t << 16 */ } return a; } #endif /* __CC_ARM || __ARMCC__ */ #ifdef __GNUC__ #if defined( __arm__ ) && \ ( !defined( __thumb__ ) || defined( __thumb2__ ) ) && \ !( defined( __CC_ARM ) || defined( __ARMCC__ ) ) #define FT_MULFIX_ASSEMBLER FT_MulFix_arm /* documentation is in freetype.h */ static __inline__ FT_Int32 FT_MulFix_arm( FT_Int32 a, FT_Int32 b ) { FT_Int32 t, t2; __asm__ __volatile__ ( "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */ "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */ #if defined( __clang__ ) && defined( __thumb2__ ) "add.w %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ #else "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */ #endif "adds %1, %1, %0\n\t" /* %1 += %0 */ "adc %2, %2, #0\n\t" /* %2 += carry */ "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */ "orr %0, %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */ : "=r"(a), "=&r"(t2), "=&r"(t) : "r"(a), "r"(b) : "cc" ); return a; } #endif /* __arm__ && */ /* ( __thumb2__ || !__thumb__ ) && */ /* !( __CC_ARM || __ARMCC__ ) */ #if defined( __i386__ ) #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 /* documentation is in freetype.h */ static __inline__ FT_Int32 FT_MulFix_i386( FT_Int32 a, FT_Int32 b ) { FT_Int32 result; __asm__ __volatile__ ( "imul %%edx\n" "movl %%edx, %%ecx\n" "sarl $31, %%ecx\n" "addl $0x8000, %%ecx\n" "addl %%ecx, %%eax\n" "adcl $0, %%edx\n" "shrl $16, %%eax\n" "shll $16, %%edx\n" "addl %%edx, %%eax\n" : "=a"(result), "=d"(b) : "a"(a), "d"(b) : "%ecx", "cc" ); return result; } #endif /* i386 */ #endif /* __GNUC__ */ #ifdef _MSC_VER /* Visual C++ */ #ifdef _M_IX86 #define FT_MULFIX_ASSEMBLER FT_MulFix_i386 /* documentation is in freetype.h */ static __inline FT_Int32 FT_MulFix_i386( FT_Int32 a, FT_Int32 b ) { FT_Int32 result; __asm { mov eax, a mov edx, b imul edx mov ecx, edx sar ecx, 31 add ecx, 8000h add eax, ecx adc edx, 0 shr eax, 16 shl edx, 16 add eax, edx mov result, eax } return result; } #endif /* _M_IX86 */ #endif /* _MSC_VER */ #if defined( __GNUC__ ) && defined( __x86_64__ ) #define FT_MULFIX_ASSEMBLER FT_MulFix_x86_64 static __inline__ FT_Int32 FT_MulFix_x86_64( FT_Int32 a, FT_Int32 b ) { /* Temporarily disable the warning that C90 doesn't support */ /* `long long'. */ #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wlong-long" #endif #if 1 /* Technically not an assembly fragment, but GCC does a really good */ /* job at inlining it and generating good machine code for it. */ long long ret, tmp; ret = (long long)a * b; tmp = ret >> 63; ret += 0x8000 + tmp; return (FT_Int32)( ret >> 16 ); #else /* For some reason, GCC 4.6 on Ubuntu 12.04 generates invalid machine */ /* code from the lines below. The main issue is that `wide_a' is not */ /* properly initialized by sign-extending `a'. Instead, the generated */ /* machine code assumes that the register that contains `a' on input */ /* can be used directly as a 64-bit value, which is wrong most of the */ /* time. */ long long wide_a = (long long)a; long long wide_b = (long long)b; long long result; __asm__ __volatile__ ( "imul %2, %1\n" "mov %1, %0\n" "sar $63, %0\n" "lea 0x8000(%1, %0), %0\n" "sar $16, %0\n" : "=&r"(result), "=&r"(wide_a) : "r"(wide_b) : "cc" ); return (FT_Int32)result; #endif #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) #pragma GCC diagnostic pop #endif } #endif /* __GNUC__ && __x86_64__ */ #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ #ifdef FT_CONFIG_OPTION_INLINE_MULFIX #ifdef FT_MULFIX_ASSEMBLER #define FT_MulFix( a, b ) FT_MULFIX_ASSEMBLER( (FT_Int32)(a), (FT_Int32)(b) ) #endif #endif /************************************************************************** * * @function: * FT_MulDiv_No_Round * * @description: * A very simple function used to perform the computation '(a*b)/c' * (without rounding) with maximum accuracy (it uses a 64-bit * intermediate integer whenever necessary). * * This function isn't necessarily as fast as some processor-specific * operations, but is at least completely portable. * * @input: * a :: * The first multiplier. * b :: * The second multiplier. * c :: * The divisor. * * @return: * The result of '(a*b)/c'. This function never traps when trying to * divide by zero; it simply returns 'MaxInt' or 'MinInt' depending on * the signs of 'a' and 'b'. */ FT_BASE( FT_Long ) FT_MulDiv_No_Round( FT_Long a, FT_Long b, FT_Long c ); /************************************************************************** * * @function: * FT_MulAddFix * * @description: * Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is * usually a 16.16 scalar. * * @input: * s :: * The array of scalars. * f :: * The array of factors. * count :: * The number of entries in the array. * * @return: * The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`. * * @note: * This function is currently used for the scaled delta computation of * variation stores. It internally uses 64-bit data types when * available, otherwise it emulates 64-bit math by using 32-bit * operations, which produce a correct result but most likely at a slower * performance in comparison to the implementation base on `int64_t`. * */ FT_BASE( FT_Int32 ) FT_MulAddFix( FT_Fixed* s, FT_Int32* f, FT_UInt count ); /* * A variant of FT_Matrix_Multiply which scales its result afterwards. The * idea is that both `a' and `b' are scaled by factors of 10 so that the * values are as precise as possible to get a correct result during the * 64bit multiplication. Let `sa' and `sb' be the scaling factors of `a' * and `b', respectively, then the scaling factor of the result is `sa*sb'. */ FT_BASE( void ) FT_Matrix_Multiply_Scaled( const FT_Matrix* a, FT_Matrix *b, FT_Long scaling ); /* * Check a matrix. If the transformation would lead to extreme shear or * extreme scaling, for example, return 0. If everything is OK, return 1. * * Based on geometric considerations we use the following inequality to * identify a degenerate matrix. * * 32 * abs(xx*yy - xy*yx) < xx^2 + xy^2 + yx^2 + yy^2 * * Value 32 is heuristic. */ FT_BASE( FT_Bool ) FT_Matrix_Check( const FT_Matrix* matrix ); /* * A variant of FT_Vector_Transform. See comments for * FT_Matrix_Multiply_Scaled. */ FT_BASE( void ) FT_Vector_Transform_Scaled( FT_Vector* vector, const FT_Matrix* matrix, FT_Long scaling ); /* * This function normalizes a vector and returns its original length. The * normalized vector is a 16.16 fixed-point unit vector with length close * to 0x10000. The accuracy of the returned length is limited to 16 bits * also. The function utilizes quick inverse square root approximation * without divisions and square roots relying on Newton's iterations * instead. */ FT_BASE( FT_UInt32 ) FT_Vector_NormLen( FT_Vector* vector ); /* * Return -1, 0, or +1, depending on the orientation of a given corner. We * use the Cartesian coordinate system, with positive vertical values going * upwards. The function returns +1 if the corner turns to the left, -1 to * the right, and 0 for undecidable cases. */ FT_BASE( FT_Int ) ft_corner_orientation( FT_Pos in_x, FT_Pos in_y, FT_Pos out_x, FT_Pos out_y ); /* * Return TRUE if a corner is flat or nearly flat. This is equivalent to * saying that the corner point is close to its neighbors, or inside an * ellipse defined by the neighbor focal points to be more precise. */ FT_BASE( FT_Int ) ft_corner_is_flat( FT_Pos in_x, FT_Pos in_y, FT_Pos out_x, FT_Pos out_y ); /* * Return the most significant bit index. */ #ifndef FT_CONFIG_OPTION_NO_ASSEMBLER #if defined( __clang__ ) || ( defined( __GNUC__ ) && \ ( __GNUC__ > 3 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) ) #if FT_SIZEOF_INT == 4 #define FT_MSB( x ) ( 31 - __builtin_clz( x ) ) #elif FT_SIZEOF_LONG == 4 #define FT_MSB( x ) ( 31 - __builtin_clzl( x ) ) #endif #elif defined( _MSC_VER ) && _MSC_VER >= 1400 #if defined( _WIN32_WCE ) #include #pragma intrinsic( _CountLeadingZeros ) #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) #elif defined( _M_ARM64 ) || defined( _M_ARM ) #include #pragma intrinsic( _CountLeadingZeros ) #define FT_MSB( x ) ( 31 - _CountLeadingZeros( x ) ) #elif defined( _M_IX86 ) || defined( _M_AMD64 ) || defined( _M_IA64 ) #include #pragma intrinsic( _BitScanReverse ) static __inline FT_Int32 FT_MSB_i386( FT_UInt32 x ) { unsigned long where; _BitScanReverse( &where, x ); return (FT_Int32)where; } #define FT_MSB( x ) FT_MSB_i386( x ) #endif #elif defined( __WATCOMC__ ) && defined( __386__ ) extern __inline FT_Int32 FT_MSB_i386( FT_UInt32 x ); #pragma aux FT_MSB_i386 = \ "bsr eax, eax" \ __parm [__eax] __nomemory \ __value [__eax] \ __modify __exact [__eax] __nomemory; #define FT_MSB( x ) FT_MSB_i386( x ) #elif defined( __SunOS_5_11 ) #include #define FT_MSB( x ) ( fls( x ) - 1 ) #elif defined( __DECC ) || defined( __DECCXX ) #include #define FT_MSB( x ) (FT_Int)( 63 - _leadz( x ) ) #elif defined( _CRAYC ) #include #define FT_MSB( x ) (FT_Int)( 31 - _leadz32( x ) ) #endif /* FT_MSB macro definitions */ #endif /* !FT_CONFIG_OPTION_NO_ASSEMBLER */ #ifndef FT_MSB FT_BASE( FT_Int ) FT_MSB( FT_UInt32 z ); #endif /* * Return sqrt(x*x+y*y), which is the same as `FT_Vector_Length' but uses * two fixed-point arguments instead. */ FT_BASE( FT_Fixed ) FT_Hypot( FT_Fixed x, FT_Fixed y ); /************************************************************************** * * @function: * FT_SqrtFixed * * @description: * Computes the square root of a 16.16 fixed-point value. * * @input: * x :: * The value to compute the root for. * * @return: * The result of 'sqrt(x)'. * * @note: * This function is slow and should be avoided. Consider `FT_Hypot` or * `FT_Vector_NormLen' instead. */ FT_BASE( FT_UInt32 ) FT_SqrtFixed( FT_UInt32 x ); #define INT_TO_F26DOT6( x ) ( (FT_Long)(x) * 64 ) /* << 6 */ #define INT_TO_F2DOT14( x ) ( (FT_Long)(x) * 16384 ) /* << 14 */ #define INT_TO_FIXED( x ) ( (FT_Long)(x) * 65536 ) /* << 16 */ #define F2DOT14_TO_FIXED( x ) ( (FT_Long)(x) * 4 ) /* << 2 */ #define FIXED_TO_INT( x ) ( FT_RoundFix( x ) >> 16 ) #define ROUND_F26DOT6( x ) ( ( (x) + 32 - ( x < 0 ) ) & -64 ) /* * The following macros have two purposes. * * - Tag places where overflow is expected and harmless. * * - Avoid run-time sanitizer errors. * * Use with care! */ #define ADD_INT( a, b ) \ (FT_Int)( (FT_UInt)(a) + (FT_UInt)(b) ) #define SUB_INT( a, b ) \ (FT_Int)( (FT_UInt)(a) - (FT_UInt)(b) ) #define MUL_INT( a, b ) \ (FT_Int)( (FT_UInt)(a) * (FT_UInt)(b) ) #define NEG_INT( a ) \ (FT_Int)( (FT_UInt)0 - (FT_UInt)(a) ) #define ADD_LONG( a, b ) \ (FT_Long)( (FT_ULong)(a) + (FT_ULong)(b) ) #define SUB_LONG( a, b ) \ (FT_Long)( (FT_ULong)(a) - (FT_ULong)(b) ) #define MUL_LONG( a, b ) \ (FT_Long)( (FT_ULong)(a) * (FT_ULong)(b) ) #define NEG_LONG( a ) \ (FT_Long)( (FT_ULong)0 - (FT_ULong)(a) ) #define ADD_INT32( a, b ) \ (FT_Int32)( (FT_UInt32)(a) + (FT_UInt32)(b) ) #define SUB_INT32( a, b ) \ (FT_Int32)( (FT_UInt32)(a) - (FT_UInt32)(b) ) #define MUL_INT32( a, b ) \ (FT_Int32)( (FT_UInt32)(a) * (FT_UInt32)(b) ) #define NEG_INT32( a ) \ (FT_Int32)( (FT_UInt32)0 - (FT_UInt32)(a) ) #ifdef FT_INT64 #define ADD_INT64( a, b ) \ (FT_Int64)( (FT_UInt64)(a) + (FT_UInt64)(b) ) #define SUB_INT64( a, b ) \ (FT_Int64)( (FT_UInt64)(a) - (FT_UInt64)(b) ) #define MUL_INT64( a, b ) \ (FT_Int64)( (FT_UInt64)(a) * (FT_UInt64)(b) ) #define NEG_INT64( a ) \ (FT_Int64)( (FT_UInt64)0 - (FT_UInt64)(a) ) #endif /* FT_INT64 */ FT_END_HEADER #endif /* FTCALC_H_ */ /* END */