From f924e2bbf798a791dba3f60f03aa69658d0d3850 Mon Sep 17 00:00:00 2001 From: Tomas Frydrych Date: Fri, 19 Jan 2007 16:04:06 +0000 Subject: [PATCH] more fixed point work --- ChangeLog | 16 +++ clutter/clutter-alpha.c | 4 +- clutter/clutter-behaviour-path.c | 6 +- clutter/clutter-fixed.c | 163 ++++++++++++++++++++----------- clutter/clutter-fixed.h | 15 ++- 5 files changed, 136 insertions(+), 68 deletions(-) diff --git a/ChangeLog b/ChangeLog index f4b5fd115..a6fcc3f7b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2007-01-19 Tomas Frydrych + + * clutter/clutter-fixed.h.: + * clutter/clutter-fixed.c: + Added fast double to int and double to fixed point conversion + routines; changed CLUTTER_FLOAT_TO_FIXED to use it. + Replaced clutter_sqrti with fixed point implementation of the QIII + algorithm. + + + * clutter/clutter-behavior-path.c: use clutter_sqrti always + + * clutter/clutter-alpha.c: + (sinc_func): replaced double -> int cast with CLUTTER_FLOAT_TO_INT + + 2007-01-18 Emmanuele Bassi * configure.ac: Post release bump to 0.3.0. diff --git a/clutter/clutter-alpha.c b/clutter/clutter-alpha.c index 70978309d..c4c272131 100644 --- a/clutter/clutter-alpha.c +++ b/clutter/clutter-alpha.c @@ -519,7 +519,6 @@ sincx1024_func (ClutterAlpha *alpha, return CLUTTER_FIXED_INT (sine * CLUTTER_ALPHA_MAX_ALPHA); } - #if 0 /* * The following two functions are left in place for reference @@ -570,9 +569,10 @@ sinc_func (ClutterAlpha *alpha, CLUTTER_NOTE (ALPHA, "sine: %2f\n", sine); - return (guint32) (sine * (gdouble) CLUTTER_ALPHA_MAX_ALPHA); + return CLUTTER_FLOAT_TO_INT ((sine * (gdouble) CLUTTER_ALPHA_MAX_ALPHA)); } #endif + /** * clutter_sine_func: * @alpha: a #ClutterAlpha diff --git a/clutter/clutter-behaviour-path.c b/clutter/clutter-behaviour-path.c index ba7d9ddf4..ecee1c8c1 100644 --- a/clutter/clutter-behaviour-path.c +++ b/clutter/clutter-behaviour-path.c @@ -196,12 +196,12 @@ node_distance (const ClutterKnot *begin, if (clutter_knot_equal (begin, end)) return 0; -#ifdef CFX_NO_FPU +#if 1 return clutter_sqrti ((end->x - begin->x) * (end->x - begin->x) + (end->y - begin->y) * (end->y - begin->y)); #else - return (gint) sqrt ((end->x - begin->x) * (end->x - begin->x) + - (end->y - begin->y) * (end->y - begin->y)); + return CLUTTER_FLOAT_TO_INT(sqrt((end->x - begin->x) * (end->x - begin->x) + + (end->y - begin->y) * (end->y - begin->y))); #endif } diff --git a/clutter/clutter-fixed.c b/clutter/clutter-fixed.c index b174795ac..92d37d7ae 100644 --- a/clutter/clutter-fixed.c +++ b/clutter/clutter-fixed.c @@ -353,14 +353,6 @@ clutter_sqrtx (ClutterFixed x) * on ARM this function is about 5 times faster than c-lib sqrt, whilst * producing errors < 1%. * - * (There are faster algorithm's available; the Carmack 'magic' - * algorithm, http://www.codemaestro.com/reviews/review00000105.html, - * is about five times faster than this one when implemented - * as fixed point, but it's error is much greater and grows with the - * size of the argument (reaches about 10% around x == 800). - * - * Note: on systems with FPU, the clib sqrt can be noticeably faster - * than this function. */ int t = 0; int sh = 0; @@ -448,68 +440,121 @@ clutter_sqrtx (ClutterFixed x) * clutter_sqrti: * @x: integer value * - * A fixed point implementation of square root for integers + * Very fast fixed point implementation of square root for integers. + * + * This function is about 10x faster than clib sqrt() on x86, and (this is + * not a typo!) more than 800x faster on ARM without FPU. It's error is < 5% + * for arguments < 132 and < 10% for arguments < 5591. * - * Return value: integer square root (truncated). + * Return value: integer square root. * * * Since: 0.2 */ gint -clutter_sqrti (gint x) +clutter_sqrti (gint number) { - int t = 0; - int sh = 0; - unsigned int mask = 0x40000000; + /* This is a fixed point implementation of the Quake III sqrt algorithm, + * described, for example, at + * http://www.codemaestro.com/reviews/review00000105.html + * + * While the original QIII is extremely fast, the use of floating division + * and multiplication makes it perform very on arm processors without FPU. + * + * The key to successfully replacing the floating point operations with + * fixed point is in the choice of the fixed point format. The QIII + * algorithm does not calculate the square root, but its reciprocal ('y' + * below), which is only at the end turned to the inverse value. In order + * for the algorithm to produce satisfactory results, the reciprocal value + * must be represented with sufficient precission; the 16.16 we use + * elsewhere in clutter is not good enough, and 10.22 is used instead. + */ + ClutterFixed x; + unsigned long y, y1; /* 10.22 fixed point */ + unsigned long f = 0x600000; /* '1.5' as 10.22 fixed */ + float flt = number; + float flt2; - if (x <= 0) - return 0; + x = CLUTTER_INT_TO_FIXED (number) / 2; - if (x > (sizeof (sqrt_tbl)/sizeof(ClutterFixed) - 1)) - { - /* - * Find the highest bit set - */ -#if __arm__ - /* This actually requires at least arm v5, but gcc does not seem - * to set the architecture defines correctly, and it is probably - * very unlikely that anyone will want to use clutter on anything - * less than v5. - */ - int bit; - __asm__ ("clz %0, %1\n" - "rsb %0, %0, #31\n" - :"=r"(bit) - :"r" (x)); + /* The QIII initial estimate */ + y = * ( unsigned long * ) &flt; + y = 0x5f3759df - ( y >> 1 ); + flt = * ( float * ) &y; - /* make even (2n) */ - bit &= 0xfffffffe; -#else - /* TODO -- add i386 branch using bshr */ - int bit = 30; - while (bit >= 0) - { - if (x & mask) - break; + /* Now, we convert the float to 10.22 fixed. We exploit the mechanism + * described at http://www.d6.com/users/checker/pdfs/gdmfp.pdf. + * + * We want 22 bit fraction; a single precission float uses 23 bit + * mantisa, so we only need to add 2^(23-22) (no need for the 1.5 + * multiplier as we are only dealing with positive numbers). + * + * Note: we have to use two separate variables here -- for some reason, + * if we try to use just the flt variable, gcc on ARM optimises the whole + * addition out, and it all goes pear shape, since without it, the bits + * in the float will not be correctly aligned. + */ + flt2 = flt + 2.0; + y = * ( long * ) &flt2; + y &= 0x7FFFFF; - mask = (mask >> 1 | mask >> 2); - bit -= 2; - } -#endif - sh = ((bit - 6) >> 1); - t = (x >> (bit - 6)); - } - else - { - return (sqrt_tbl[x] >> CFX_Q); - } + /* Now we correct the estimate, only single iterration is needed */ + y1 = (y >> 11) * (y >> 11); + y1 = (y1 >> 8) * (x >> 8); - x = sqrt_tbl[t]; + y1 = f - y1; + y = (y >> 11) * (y1 >> 11); - if (sh > 0) - x = x << sh; - else if (sh < 0) - x = (x >> (1 + ~sh)); - - return (x >> CFX_Q); + /* Invert, round and convert from 10.22 to an integer + * 0x1e3c68 is a magical rounding constant that produces slightly + * better results than 0x200000. + */ + return (number * y + 0x1e3c68) >> 22; } + +/* */ +const double _magic = 68719476736.0*1.5; + +/* Where in the 64 bits of double is the mantisa */ +#ifdef LITTLE_ENDIAN +#define _CFX_MAN 0 +#else +#define _CFX_MAN 1 +#endif + +/* + * clutter_double_to_fixed : + * @value: value to be converted + * + * A fast conversion from double precision floating to fixed point + * + * Return value: Fixed point representation of the value + * + * Since: 0.2 + */ +ClutterFixed +_clutter_double_to_fixed (double val) +{ + val = val + _magic; + return ((gint32*)&val)[_CFX_MAN]; +} + +/* + * clutter_double_to_int : + * @value: value to be converted + * + * A fast conversion from doulbe precision floatint point to int; + * used this instead of casting double/float to int. + * + * Return value: Integer part of the double + * + * Since: 0.2 + */ +ClutterFixed +_clutter_double_to_int (double val) +{ + val = val + _magic; + return ((gint32*)&val)[_CFX_MAN] >> 16; +} + +#undef _CFX_MAN diff --git a/clutter/clutter-fixed.h b/clutter/clutter-fixed.h index 206ac62d3..2eca15689 100644 --- a/clutter/clutter-fixed.h +++ b/clutter/clutter-fixed.h @@ -118,10 +118,9 @@ typedef gint32 ClutterAngle; /* angle such that 1024 == 2*PI */ #define CLUTTER_FIXED_TO_FLOAT(x) ((float) ((int)(x) / 65536.0)) #define CLUTTER_FIXED_TO_DOUBLE(x) ((double) ((int)(x) / 65536.0)) -#define CLUTTER_FLOAT_TO_FIXED(x) \ - ( (ABS(x) > 32767.0) ? (((x) / (x)) * 0x7fffffff) \ - : ((long)((x) * 65536.0 + ((x) < 0 ? -0.5 \ - : 0.5))) ) +#define CLUTTER_FLOAT_TO_FIXED(x) _clutter_double_to_fixed((x)) +#define CLUTTER_FLOAT_TO_INT(x) _clutter_double_to_int((x)) + #define CLUTTER_INT_TO_FIXED(x) ((x) << CFX_Q) #define CLUTTER_FIXED_INT(x) ((x) >> CFX_Q) @@ -180,6 +179,14 @@ ClutterFixed clutter_sini (ClutterAngle angle); ClutterFixed clutter_sqrtx (ClutterFixed x); gint clutter_sqrti (gint x); + +/* */ +extern inline +ClutterFixed _clutter_double_to_fixed (double value); + +extern inline +ClutterFixed _clutter_double_to_int (double value); + G_END_DECLS #endif