| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- /*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
- * See LICENSE file for details. */
- #include "ilog.h"
- #include <limits.h>
- /*The fastest fallback strategy for platforms with fast multiplication appears
- to be based on de Bruijn sequences~\cite{LP98}.
- Tests confirmed this to be true even on an ARM11, where it is actually faster
- than using the native clz instruction.
- Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where
- multiplication or table lookups are too expensive.
- @UNPUBLISHED{LP98,
- author="Charles E. Leiserson and Harald Prokop",
- title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
- month=Jun,
- year=1998,
- note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
- }*/
- static UNNEEDED const unsigned char DEBRUIJN_IDX32[32]={
- 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
- 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
- };
- /* We always compile these in, in case someone takes address of function. */
- #undef ilog32_nz
- #undef ilog32
- #undef ilog64_nz
- #undef ilog64
- int ilog32(uint32_t _v){
- /*On a Pentium M, this branchless version tested as the fastest version without
- multiplications on 1,000,000,000 random 32-bit integers, edging out a
- similar version with branches, and a 256-entry LUT version.*/
- # if defined(ILOG_NODEBRUIJN)
- int ret;
- int m;
- ret=_v>0;
- m=(_v>0xFFFFU)<<4;
- _v>>=m;
- ret|=m;
- m=(_v>0xFFU)<<3;
- _v>>=m;
- ret|=m;
- m=(_v>0xFU)<<2;
- _v>>=m;
- ret|=m;
- m=(_v>3)<<1;
- _v>>=m;
- ret|=m;
- ret+=_v>1;
- return ret;
- /*This de Bruijn sequence version is faster if you have a fast multiplier.*/
- # else
- int ret;
- ret=_v>0;
- _v|=_v>>1;
- _v|=_v>>2;
- _v|=_v>>4;
- _v|=_v>>8;
- _v|=_v>>16;
- _v=(_v>>1)+1;
- ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
- return ret;
- # endif
- }
- int ilog32_nz(uint32_t _v)
- {
- return ilog32(_v);
- }
- int ilog64(uint64_t _v){
- # if defined(ILOG_NODEBRUIJN)
- uint32_t v;
- int ret;
- int m;
- ret=_v>0;
- m=(_v>0xFFFFFFFFU)<<5;
- v=(uint32_t)(_v>>m);
- ret|=m;
- m=(v>0xFFFFU)<<4;
- v>>=m;
- ret|=m;
- m=(v>0xFFU)<<3;
- v>>=m;
- ret|=m;
- m=(v>0xFU)<<2;
- v>>=m;
- ret|=m;
- m=(v>3)<<1;
- v>>=m;
- ret|=m;
- ret+=v>1;
- return ret;
- # else
- /*If we don't have a 64-bit word, split it into two 32-bit halves.*/
- # if LONG_MAX<9223372036854775807LL
- uint32_t v;
- int ret;
- int m;
- ret=_v>0;
- m=(_v>0xFFFFFFFFU)<<5;
- v=(uint32_t)(_v>>m);
- ret|=m;
- v|=v>>1;
- v|=v>>2;
- v|=v>>4;
- v|=v>>8;
- v|=v>>16;
- v=(v>>1)+1;
- ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
- return ret;
- /*Otherwise do it in one 64-bit operation.*/
- # else
- static const unsigned char DEBRUIJN_IDX64[64]={
- 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
- 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
- 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
- 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
- };
- int ret;
- ret=_v>0;
- _v|=_v>>1;
- _v|=_v>>2;
- _v|=_v>>4;
- _v|=_v>>8;
- _v|=_v>>16;
- _v|=_v>>32;
- _v=(_v>>1)+1;
- ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
- return ret;
- # endif
- # endif
- }
- int ilog64_nz(uint64_t _v)
- {
- return ilog64(_v);
- }
|