ilog.c 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
  2. * See LICENSE file for details. */
  3. #include "ilog.h"
  4. #include <limits.h>
  5. /*The fastest fallback strategy for platforms with fast multiplication appears
  6. to be based on de Bruijn sequences~\cite{LP98}.
  7. Tests confirmed this to be true even on an ARM11, where it is actually faster
  8. than using the native clz instruction.
  9. Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where
  10. multiplication or table lookups are too expensive.
  11. @UNPUBLISHED{LP98,
  12. author="Charles E. Leiserson and Harald Prokop",
  13. title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
  14. month=Jun,
  15. year=1998,
  16. note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
  17. }*/
  18. static UNNEEDED const unsigned char DEBRUIJN_IDX32[32]={
  19. 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
  20. 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
  21. };
  22. /* We always compile these in, in case someone takes address of function. */
  23. #undef ilog32_nz
  24. #undef ilog32
  25. #undef ilog64_nz
  26. #undef ilog64
  27. int ilog32(uint32_t _v){
  28. /*On a Pentium M, this branchless version tested as the fastest version without
  29. multiplications on 1,000,000,000 random 32-bit integers, edging out a
  30. similar version with branches, and a 256-entry LUT version.*/
  31. # if defined(ILOG_NODEBRUIJN)
  32. int ret;
  33. int m;
  34. ret=_v>0;
  35. m=(_v>0xFFFFU)<<4;
  36. _v>>=m;
  37. ret|=m;
  38. m=(_v>0xFFU)<<3;
  39. _v>>=m;
  40. ret|=m;
  41. m=(_v>0xFU)<<2;
  42. _v>>=m;
  43. ret|=m;
  44. m=(_v>3)<<1;
  45. _v>>=m;
  46. ret|=m;
  47. ret+=_v>1;
  48. return ret;
  49. /*This de Bruijn sequence version is faster if you have a fast multiplier.*/
  50. # else
  51. int ret;
  52. ret=_v>0;
  53. _v|=_v>>1;
  54. _v|=_v>>2;
  55. _v|=_v>>4;
  56. _v|=_v>>8;
  57. _v|=_v>>16;
  58. _v=(_v>>1)+1;
  59. ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
  60. return ret;
  61. # endif
  62. }
  63. int ilog32_nz(uint32_t _v)
  64. {
  65. return ilog32(_v);
  66. }
  67. int ilog64(uint64_t _v){
  68. # if defined(ILOG_NODEBRUIJN)
  69. uint32_t v;
  70. int ret;
  71. int m;
  72. ret=_v>0;
  73. m=(_v>0xFFFFFFFFU)<<5;
  74. v=(uint32_t)(_v>>m);
  75. ret|=m;
  76. m=(v>0xFFFFU)<<4;
  77. v>>=m;
  78. ret|=m;
  79. m=(v>0xFFU)<<3;
  80. v>>=m;
  81. ret|=m;
  82. m=(v>0xFU)<<2;
  83. v>>=m;
  84. ret|=m;
  85. m=(v>3)<<1;
  86. v>>=m;
  87. ret|=m;
  88. ret+=v>1;
  89. return ret;
  90. # else
  91. /*If we don't have a 64-bit word, split it into two 32-bit halves.*/
  92. # if LONG_MAX<9223372036854775807LL
  93. uint32_t v;
  94. int ret;
  95. int m;
  96. ret=_v>0;
  97. m=(_v>0xFFFFFFFFU)<<5;
  98. v=(uint32_t)(_v>>m);
  99. ret|=m;
  100. v|=v>>1;
  101. v|=v>>2;
  102. v|=v>>4;
  103. v|=v>>8;
  104. v|=v>>16;
  105. v=(v>>1)+1;
  106. ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
  107. return ret;
  108. /*Otherwise do it in one 64-bit operation.*/
  109. # else
  110. static const unsigned char DEBRUIJN_IDX64[64]={
  111. 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
  112. 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
  113. 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
  114. 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
  115. };
  116. int ret;
  117. ret=_v>0;
  118. _v|=_v>>1;
  119. _v|=_v>>2;
  120. _v|=_v>>4;
  121. _v|=_v>>8;
  122. _v|=_v>>16;
  123. _v|=_v>>32;
  124. _v=(_v>>1)+1;
  125. ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
  126. return ret;
  127. # endif
  128. # endif
  129. }
  130. int ilog64_nz(uint64_t _v)
  131. {
  132. return ilog64(_v);
  133. }