alloc.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239
  1. /* Licensed under LGPLv2.1+ - see LICENSE file for details */
  2. #include <unistd.h>
  3. #include <stdint.h>
  4. #include <string.h>
  5. #include <limits.h>
  6. #include <assert.h>
  7. #include <stdlib.h>
  8. #include "alloc.h"
  9. #include "bitops.h"
  10. #include "tiny.h"
  11. #include <ccan/build_assert/build_assert.h>
  12. #include <ccan/likely/likely.h>
  13. #include <ccan/alignof/alignof.h>
  14. #include <ccan/short_types/short_types.h>
  15. #include <ccan/compiler/compiler.h>
  16. #include "config.h"
  17. /*
  18. Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
  19. http://samba.org/~tridge/junkcode/alloc_mmap/
  20. Copyright (C) Andrew Tridgell 2007
  21. This library is free software; you can redistribute it and/or
  22. modify it under the terms of the GNU Lesser General Public
  23. License as published by the Free Software Foundation; either
  24. version 2 of the License, or (at your option) any later version.
  25. This library is distributed in the hope that it will be useful,
  26. but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  28. Lesser General Public License for more details.
  29. You should have received a copy of the GNU Lesser General Public
  30. License along with this library; if not, write to the Free Software
  31. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  32. */
  33. /* We divide the pool into this many large pages (nearest power of 2) */
  34. #define MAX_LARGE_PAGES (256UL)
  35. /* 32 small pages == 1 large page. */
  36. #define BITS_FROM_SMALL_TO_LARGE_PAGE 5
  37. #define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
  38. /* Smallest pool size for this scheme: 128-byte small pages. That's
  39. * 9/13% overhead for 32/64 bit. */
  40. #define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128)
  41. /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
  42. #define INTER_BUCKET_SPACE 4
  43. #define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE)
  44. /* FIXME: Figure this out properly. */
  45. #define MAX_SIZE (1 << 30)
  46. /* How few object to fit in a page before using a larger one? (8) */
  47. #define MAX_PAGE_OBJECT_ORDER 3
  48. #define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
  49. struct bucket_state {
  50. u32 elements_per_page;
  51. u16 page_list;
  52. u16 full_list;
  53. };
  54. struct header {
  55. /* Bitmap of which pages are large. */
  56. unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG];
  57. /* List of unused small/large pages. */
  58. u16 small_free_list;
  59. u16 large_free_list;
  60. /* List of huge allocs. */
  61. unsigned long huge;
  62. /* This is less defined: we have two buckets for each power of 2 */
  63. struct bucket_state bs[1];
  64. };
  65. struct huge_alloc {
  66. unsigned long next, prev;
  67. unsigned long off, len;
  68. };
  69. struct page_header {
  70. u16 next, prev;
  71. /* FIXME: We can just count all-0 and all-1 used[] elements. */
  72. unsigned elements_used : 25;
  73. unsigned bucket : 7;
  74. unsigned long used[1]; /* One bit per element. */
  75. };
  76. /*
  77. * Every 4 buckets, the size doubles.
  78. * Between buckets, sizes increase linearly.
  79. *
  80. * eg. bucket 40 = 2^10 = 1024
  81. * bucket 41 = 2^10 + 2^10*4 = 1024 + 256
  82. * bucket 42 = 2^10 + 2^10*4 = 1024 + 512
  83. * bucket 43 = 2^10 + 2^10*4 = 1024 + 768
  84. * bucket 45 = 2^11 = 2048
  85. *
  86. * Care is taken to handle low numbered buckets, at cost of overflow.
  87. */
  88. static unsigned long bucket_to_size(unsigned int bucket)
  89. {
  90. unsigned long base = 1UL << (bucket / INTER_BUCKET_SPACE);
  91. return base + ((bucket % INTER_BUCKET_SPACE)
  92. << (bucket / INTER_BUCKET_SPACE))
  93. / INTER_BUCKET_SPACE;
  94. }
  95. /*
  96. * Say size is 10.
  97. * fls(size/2) == 3. 1 << 3 == 8, so we're 2 too large, out of a possible
  98. * 8 too large. That's 1/4 of the way to the next power of 2 == 1 bucket.
  99. *
  100. * We make sure we round up. Note that this fails on 32 bit at size
  101. * 1879048193 (around bucket 120).
  102. */
  103. static unsigned int size_to_bucket(unsigned long size)
  104. {
  105. unsigned int base = afls(size/2);
  106. unsigned long overshoot;
  107. overshoot = size - (1UL << base);
  108. return base * INTER_BUCKET_SPACE
  109. + ((overshoot * INTER_BUCKET_SPACE + (1UL << base)-1) >> base);
  110. }
  111. static unsigned int small_page_bits(unsigned long poolsize)
  112. {
  113. return afls(poolsize / MAX_SMALL_PAGES - 1);
  114. }
  115. static struct page_header *from_pgnum(struct header *head,
  116. unsigned long pgnum,
  117. unsigned sp_bits)
  118. {
  119. return (struct page_header *)((char *)head + (pgnum << sp_bits));
  120. }
  121. static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits)
  122. {
  123. return ((char *)p - (char *)head) >> sp_bits;
  124. }
  125. static size_t used_size(unsigned int num_elements)
  126. {
  127. return align_up(num_elements, BITS_PER_LONG) / CHAR_BIT;
  128. }
  129. /*
  130. * We always align the first entry to the lower power of 2.
  131. * eg. the 12-byte bucket gets 8-byte aligned. The 4096-byte bucket
  132. * gets 4096-byte aligned.
  133. */
  134. static unsigned long page_header_size(unsigned int align_bits,
  135. unsigned long num_elements)
  136. {
  137. unsigned long size;
  138. size = sizeof(struct page_header)
  139. - sizeof(((struct page_header *)0)->used)
  140. + used_size(num_elements);
  141. return align_up(size, 1UL << align_bits);
  142. }
  143. static void add_to_list(struct header *head,
  144. u16 *list, struct page_header *ph, unsigned sp_bits)
  145. {
  146. unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits);
  147. ph->next = h;
  148. if (h) {
  149. struct page_header *prev = from_pgnum(head, h, sp_bits);
  150. assert(prev->prev == 0);
  151. prev->prev = offset;
  152. }
  153. *list = offset;
  154. ph->prev = 0;
  155. }
  156. static void del_from_list(struct header *head,
  157. u16 *list, struct page_header *ph, unsigned sp_bits)
  158. {
  159. /* Front of list? */
  160. if (ph->prev == 0) {
  161. *list = ph->next;
  162. } else {
  163. struct page_header *prev = from_pgnum(head, ph->prev, sp_bits);
  164. prev->next = ph->next;
  165. }
  166. if (ph->next != 0) {
  167. struct page_header *next = from_pgnum(head, ph->next, sp_bits);
  168. next->prev = ph->prev;
  169. }
  170. }
  171. static u16 pop_from_list(struct header *head,
  172. u16 *list,
  173. unsigned int sp_bits)
  174. {
  175. u16 h = *list;
  176. struct page_header *ph = from_pgnum(head, h, sp_bits);
  177. if (likely(h)) {
  178. *list = ph->next;
  179. if (*list)
  180. from_pgnum(head, *list, sp_bits)->prev = 0;
  181. }
  182. return h;
  183. }
  184. static void add_to_huge_list(struct header *head, struct huge_alloc *ha)
  185. {
  186. unsigned long h = head->huge;
  187. unsigned long offset = (char *)ha - (char *)head;
  188. ha->next = h;
  189. if (h) {
  190. struct huge_alloc *prev = (void *)((char *)head + h);
  191. assert(prev->prev == 0);
  192. prev->prev = offset;
  193. }
  194. head->huge = offset;
  195. ha->prev = 0;
  196. }
  197. static void del_from_huge(struct header *head, struct huge_alloc *ha)
  198. {
  199. /* Front of list? */
  200. if (ha->prev == 0) {
  201. head->huge = ha->next;
  202. } else {
  203. struct huge_alloc *prev = (void *)((char *)head + ha->prev);
  204. prev->next = ha->next;
  205. }
  206. if (ha->next != 0) {
  207. struct huge_alloc *next = (void *)((char *)head + ha->next);
  208. next->prev = ha->prev;
  209. }
  210. }
  211. static void add_small_page_to_freelist(struct header *head,
  212. struct page_header *ph,
  213. unsigned int sp_bits)
  214. {
  215. add_to_list(head, &head->small_free_list, ph, sp_bits);
  216. }
  217. static void add_large_page_to_freelist(struct header *head,
  218. struct page_header *ph,
  219. unsigned int sp_bits)
  220. {
  221. add_to_list(head, &head->large_free_list, ph, sp_bits);
  222. }
  223. static void add_to_bucket_list(struct header *head,
  224. struct bucket_state *bs,
  225. struct page_header *ph,
  226. unsigned int sp_bits)
  227. {
  228. add_to_list(head, &bs->page_list, ph, sp_bits);
  229. }
  230. static void del_from_bucket_list(struct header *head,
  231. struct bucket_state *bs,
  232. struct page_header *ph,
  233. unsigned int sp_bits)
  234. {
  235. del_from_list(head, &bs->page_list, ph, sp_bits);
  236. }
  237. static void del_from_bucket_full_list(struct header *head,
  238. struct bucket_state *bs,
  239. struct page_header *ph,
  240. unsigned int sp_bits)
  241. {
  242. del_from_list(head, &bs->full_list, ph, sp_bits);
  243. }
  244. static void add_to_bucket_full_list(struct header *head,
  245. struct bucket_state *bs,
  246. struct page_header *ph,
  247. unsigned int sp_bits)
  248. {
  249. add_to_list(head, &bs->full_list, ph, sp_bits);
  250. }
  251. static void clear_bit(unsigned long bitmap[], unsigned int off)
  252. {
  253. bitmap[off / BITS_PER_LONG] &= ~(1UL << (off % BITS_PER_LONG));
  254. }
  255. static bool test_bit(const unsigned long bitmap[], unsigned int off)
  256. {
  257. return bitmap[off / BITS_PER_LONG] & (1UL << (off % BITS_PER_LONG));
  258. }
  259. static void set_bit(unsigned long bitmap[], unsigned int off)
  260. {
  261. bitmap[off / BITS_PER_LONG] |= (1UL << (off % BITS_PER_LONG));
  262. }
  263. /* There must be a bit to be found. */
  264. static unsigned int find_free_bit(const unsigned long bitmap[])
  265. {
  266. unsigned int i;
  267. for (i = 0; bitmap[i] == -1UL; i++);
  268. return (i*BITS_PER_LONG) + affsl(~bitmap[i]) - 1;
  269. }
  270. /* How many elements can we fit in a page? */
  271. static unsigned long elements_per_page(unsigned long align_bits,
  272. unsigned long esize,
  273. unsigned long psize)
  274. {
  275. unsigned long num, overhead;
  276. /* First approximation: no extra room for bitmap. */
  277. overhead = align_up(sizeof(struct page_header), 1UL << align_bits);
  278. num = (psize - overhead) / esize;
  279. while (page_header_size(align_bits, num) + esize * num > psize)
  280. num--;
  281. return num;
  282. }
  283. static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits)
  284. {
  285. unsigned long max_smallsize;
  286. /* Note: this doesn't take into account page header. */
  287. max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
  288. return bucket_to_size(bucket) > max_smallsize;
  289. }
  290. static unsigned int max_bucket(unsigned int lp_bits)
  291. {
  292. return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
  293. }
  294. void alloc_init(void *pool, unsigned long poolsize)
  295. {
  296. struct header *head = pool;
  297. struct page_header *ph;
  298. unsigned int lp_bits, sp_bits, num_buckets;
  299. unsigned long header_size, i;
  300. if (poolsize < MIN_USEFUL_SIZE) {
  301. tiny_alloc_init(pool, poolsize);
  302. return;
  303. }
  304. /* We rely on page numbers fitting in 16 bit. */
  305. BUILD_ASSERT(MAX_SMALL_PAGES < 65536);
  306. sp_bits = small_page_bits(poolsize);
  307. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  308. num_buckets = max_bucket(lp_bits);
  309. head = pool;
  310. header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
  311. memset(head, 0, header_size);
  312. for (i = 0; i < num_buckets; i++) {
  313. unsigned long pagesize;
  314. if (large_page_bucket(i, sp_bits))
  315. pagesize = 1UL << lp_bits;
  316. else
  317. pagesize = 1UL << sp_bits;
  318. head->bs[i].elements_per_page
  319. = elements_per_page(i / INTER_BUCKET_SPACE,
  320. bucket_to_size(i),
  321. pagesize);
  322. }
  323. /* They start as all large pages. */
  324. memset(head->pagesize, 0xFF, sizeof(head->pagesize));
  325. /* FIXME: small pages for last bit? */
  326. /* Split first page into small pages. */
  327. assert(header_size < (1UL << lp_bits));
  328. clear_bit(head->pagesize, 0);
  329. /* Skip over page(s) used by header, add rest to free list */
  330. for (i = align_up(header_size, (1UL << sp_bits)) >> sp_bits;
  331. i < SMALL_PAGES_PER_LARGE_PAGE;
  332. i++) {
  333. ph = from_pgnum(head, i, sp_bits);
  334. ph->elements_used = 0;
  335. add_small_page_to_freelist(head, ph, sp_bits);
  336. }
  337. /* Add the rest of the pages as large pages. */
  338. i = SMALL_PAGES_PER_LARGE_PAGE;
  339. while ((i << sp_bits) + (1UL << lp_bits) <= poolsize) {
  340. assert(i < MAX_SMALL_PAGES);
  341. ph = from_pgnum(head, i, sp_bits);
  342. ph->elements_used = 0;
  343. add_large_page_to_freelist(head, ph, sp_bits);
  344. i += SMALL_PAGES_PER_LARGE_PAGE;
  345. }
  346. }
  347. /* A large page worth of small pages are free: delete them from free list. */
  348. static void del_large_from_small_free_list(struct header *head,
  349. struct page_header *ph,
  350. unsigned int sp_bits)
  351. {
  352. unsigned long i;
  353. for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
  354. del_from_list(head, &head->small_free_list,
  355. (struct page_header *)((char *)ph
  356. + (i << sp_bits)),
  357. sp_bits);
  358. }
  359. }
  360. static bool all_empty(struct header *head,
  361. unsigned long pgnum,
  362. unsigned sp_bits)
  363. {
  364. unsigned long i;
  365. for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
  366. struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits);
  367. if (ph->elements_used)
  368. return false;
  369. }
  370. return true;
  371. }
  372. static void recombine_small_pages(struct header *head, unsigned long poolsize,
  373. unsigned int sp_bits)
  374. {
  375. unsigned long i;
  376. unsigned int lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  377. /* Look for small pages to coalesce, after first large page. */
  378. for (i = SMALL_PAGES_PER_LARGE_PAGE;
  379. i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE;
  380. i += SMALL_PAGES_PER_LARGE_PAGE) {
  381. /* Already a large page? */
  382. if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE))
  383. continue;
  384. if (all_empty(head, i, sp_bits)) {
  385. struct page_header *ph = from_pgnum(head, i, sp_bits);
  386. set_bit(head->pagesize,
  387. i / SMALL_PAGES_PER_LARGE_PAGE);
  388. del_large_from_small_free_list(head, ph, sp_bits);
  389. add_large_page_to_freelist(head, ph, sp_bits);
  390. }
  391. }
  392. }
  393. static u16 get_large_page(struct header *head, unsigned long poolsize,
  394. unsigned int sp_bits)
  395. {
  396. unsigned int page;
  397. page = pop_from_list(head, &head->large_free_list, sp_bits);
  398. if (likely(page))
  399. return page;
  400. recombine_small_pages(head, poolsize, sp_bits);
  401. return pop_from_list(head, &head->large_free_list, sp_bits);
  402. }
  403. /* Returns small page. */
  404. static unsigned long break_up_large_page(struct header *head,
  405. unsigned int sp_bits,
  406. u16 lpage)
  407. {
  408. unsigned int i;
  409. clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE);
  410. for (i = 1; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
  411. struct page_header *ph = from_pgnum(head, lpage + i, sp_bits);
  412. /* Initialize this: huge_alloc reads it. */
  413. ph->elements_used = 0;
  414. add_small_page_to_freelist(head, ph, sp_bits);
  415. }
  416. return lpage;
  417. }
  418. static u16 get_small_page(struct header *head, unsigned long poolsize,
  419. unsigned int sp_bits)
  420. {
  421. u16 ret;
  422. ret = pop_from_list(head, &head->small_free_list, sp_bits);
  423. if (likely(ret))
  424. return ret;
  425. ret = get_large_page(head, poolsize, sp_bits);
  426. if (likely(ret))
  427. ret = break_up_large_page(head, sp_bits, ret);
  428. return ret;
  429. }
  430. static bool huge_allocated(struct header *head, unsigned long offset)
  431. {
  432. unsigned long i;
  433. struct huge_alloc *ha;
  434. for (i = head->huge; i; i = ha->next) {
  435. ha = (void *)((char *)head + i);
  436. if (ha->off <= offset && ha->off + ha->len > offset)
  437. return true;
  438. }
  439. return false;
  440. }
  441. /* They want something really big. Aim for contiguous pages (slow). */
  442. static COLD void *huge_alloc(void *pool, unsigned long poolsize,
  443. unsigned long size, unsigned long align)
  444. {
  445. struct header *head = pool;
  446. struct huge_alloc *ha;
  447. unsigned long i, sp_bits, lp_bits, num, header_size;
  448. sp_bits = small_page_bits(poolsize);
  449. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  450. /* Allocate tracking structure optimistically. */
  451. ha = alloc_get(pool, poolsize, sizeof(*ha), ALIGNOF(*ha));
  452. if (!ha)
  453. return NULL;
  454. /* First search for contiguous small pages... */
  455. header_size = sizeof(*head) + sizeof(head->bs) * (max_bucket(lp_bits)-1);
  456. num = 0;
  457. for (i = (header_size + (1UL << sp_bits) - 1) >> sp_bits;
  458. i << sp_bits < poolsize;
  459. i++) {
  460. struct page_header *pg;
  461. unsigned long off = (i << sp_bits);
  462. /* Skip over large pages. */
  463. if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
  464. i += (1UL << BITS_FROM_SMALL_TO_LARGE_PAGE)-1;
  465. continue;
  466. }
  467. /* Does this page meet alignment requirements? */
  468. if (!num && off % align != 0)
  469. continue;
  470. /* FIXME: This makes us O(n^2). */
  471. if (huge_allocated(head, off)) {
  472. num = 0;
  473. continue;
  474. }
  475. pg = (struct page_header *)((char *)head + off);
  476. if (pg->elements_used) {
  477. num = 0;
  478. continue;
  479. }
  480. num++;
  481. if (num << sp_bits >= size) {
  482. unsigned long pgnum;
  483. /* Remove from free list. */
  484. for (pgnum = i; pgnum > i - num; pgnum--) {
  485. pg = from_pgnum(head, pgnum, sp_bits);
  486. del_from_list(head,
  487. &head->small_free_list,
  488. pg, sp_bits);
  489. }
  490. ha->off = (i - num + 1) << sp_bits;
  491. ha->len = num << sp_bits;
  492. goto done;
  493. }
  494. }
  495. /* Now search for large pages... */
  496. recombine_small_pages(head, poolsize, sp_bits);
  497. num = 0;
  498. for (i = (header_size + (1UL << lp_bits) - 1) >> lp_bits;
  499. (i << lp_bits) < poolsize; i++) {
  500. struct page_header *pg;
  501. unsigned long off = (i << lp_bits);
  502. /* Ignore small pages. */
  503. if (!test_bit(head->pagesize, i))
  504. continue;
  505. /* Does this page meet alignment requirements? */
  506. if (!num && off % align != 0)
  507. continue;
  508. /* FIXME: This makes us O(n^2). */
  509. if (huge_allocated(head, off)) {
  510. num = 0;
  511. continue;
  512. }
  513. pg = (struct page_header *)((char *)head + off);
  514. if (pg->elements_used) {
  515. num = 0;
  516. continue;
  517. }
  518. num++;
  519. if (num << lp_bits >= size) {
  520. unsigned long pgnum;
  521. /* Remove from free list. */
  522. for (pgnum = i; pgnum > i - num; pgnum--) {
  523. pg = from_pgnum(head, pgnum, lp_bits);
  524. del_from_list(head,
  525. &head->large_free_list,
  526. pg, sp_bits);
  527. }
  528. ha->off = (i - num + 1) << lp_bits;
  529. ha->len = num << lp_bits;
  530. goto done;
  531. }
  532. }
  533. /* Unable to satisfy: free huge alloc structure. */
  534. alloc_free(pool, poolsize, ha);
  535. return NULL;
  536. done:
  537. add_to_huge_list(pool, ha);
  538. return (char *)pool + ha->off;
  539. }
  540. static COLD void
  541. huge_free(struct header *head, unsigned long poolsize, void *free)
  542. {
  543. unsigned long i, off, pgnum, free_off = (char *)free - (char *)head;
  544. unsigned int sp_bits, lp_bits;
  545. struct huge_alloc *ha;
  546. for (i = head->huge; i; i = ha->next) {
  547. ha = (void *)((char *)head + i);
  548. if (free_off == ha->off)
  549. break;
  550. }
  551. assert(i);
  552. /* Free up all the pages, delete and free ha */
  553. sp_bits = small_page_bits(poolsize);
  554. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  555. pgnum = free_off >> sp_bits;
  556. if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
  557. for (off = ha->off;
  558. off < ha->off + ha->len;
  559. off += 1UL << lp_bits) {
  560. add_large_page_to_freelist(head,
  561. (void *)((char *)head + off),
  562. sp_bits);
  563. }
  564. } else {
  565. for (off = ha->off;
  566. off < ha->off + ha->len;
  567. off += 1UL << sp_bits) {
  568. add_small_page_to_freelist(head,
  569. (void *)((char *)head + off),
  570. sp_bits);
  571. }
  572. }
  573. del_from_huge(head, ha);
  574. alloc_free(head, poolsize, ha);
  575. }
  576. static COLD unsigned long huge_size(struct header *head, void *p)
  577. {
  578. unsigned long i, off = (char *)p - (char *)head;
  579. struct huge_alloc *ha;
  580. for (i = head->huge; i; i = ha->next) {
  581. ha = (void *)((char *)head + i);
  582. if (off == ha->off) {
  583. return ha->len;
  584. }
  585. }
  586. abort();
  587. }
  588. void *alloc_get(void *pool, unsigned long poolsize,
  589. unsigned long size, unsigned long align)
  590. {
  591. struct header *head = pool;
  592. unsigned int bucket;
  593. unsigned long i;
  594. struct bucket_state *bs;
  595. struct page_header *ph;
  596. unsigned int sp_bits;
  597. if (poolsize < MIN_USEFUL_SIZE) {
  598. return tiny_alloc_get(pool, poolsize, size, align);
  599. }
  600. size = align_up(size, align);
  601. if (unlikely(!size))
  602. size = 1;
  603. bucket = size_to_bucket(size);
  604. sp_bits = small_page_bits(poolsize);
  605. if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) {
  606. return huge_alloc(pool, poolsize, size, align);
  607. }
  608. bs = &head->bs[bucket];
  609. if (!bs->page_list) {
  610. struct page_header *ph;
  611. if (large_page_bucket(bucket, sp_bits))
  612. bs->page_list = get_large_page(head, poolsize,
  613. sp_bits);
  614. else
  615. bs->page_list = get_small_page(head, poolsize,
  616. sp_bits);
  617. /* FIXME: Try large-aligned alloc? Header stuffing? */
  618. if (unlikely(!bs->page_list))
  619. return NULL;
  620. ph = from_pgnum(head, bs->page_list, sp_bits);
  621. ph->bucket = bucket;
  622. ph->elements_used = 0;
  623. ph->next = 0;
  624. memset(ph->used, 0, used_size(bs->elements_per_page));
  625. }
  626. ph = from_pgnum(head, bs->page_list, sp_bits);
  627. i = find_free_bit(ph->used);
  628. set_bit(ph->used, i);
  629. ph->elements_used++;
  630. /* check if this page is now full */
  631. if (unlikely(ph->elements_used == bs->elements_per_page)) {
  632. del_from_bucket_list(head, bs, ph, sp_bits);
  633. add_to_bucket_full_list(head, bs, ph, sp_bits);
  634. }
  635. return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
  636. bs->elements_per_page)
  637. + i * bucket_to_size(bucket);
  638. }
  639. void alloc_free(void *pool, unsigned long poolsize, void *free)
  640. {
  641. struct header *head = pool;
  642. struct bucket_state *bs;
  643. unsigned int sp_bits;
  644. unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool;
  645. bool smallpage;
  646. struct page_header *ph;
  647. if (poolsize < MIN_USEFUL_SIZE) {
  648. tiny_alloc_free(pool, poolsize, free);
  649. return;
  650. }
  651. /* Get page header. */
  652. sp_bits = small_page_bits(poolsize);
  653. pgnum = offset >> sp_bits;
  654. /* Big page? Round down further. */
  655. if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
  656. smallpage = false;
  657. pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
  658. } else
  659. smallpage = true;
  660. /* Step back to page header. */
  661. ph = from_pgnum(head, pgnum, sp_bits);
  662. if ((void *)ph == free) {
  663. huge_free(head, poolsize, free);
  664. return;
  665. }
  666. bs = &head->bs[ph->bucket];
  667. pgoffset = offset - (pgnum << sp_bits)
  668. - page_header_size(ph->bucket / INTER_BUCKET_SPACE,
  669. bs->elements_per_page);
  670. if (unlikely(ph->elements_used == bs->elements_per_page)) {
  671. del_from_bucket_full_list(head, bs, ph, sp_bits);
  672. add_to_bucket_list(head, bs, ph, sp_bits);
  673. }
  674. /* Which element are we? */
  675. i = pgoffset / bucket_to_size(ph->bucket);
  676. clear_bit(ph->used, i);
  677. ph->elements_used--;
  678. if (unlikely(ph->elements_used == 0)) {
  679. bs = &head->bs[ph->bucket];
  680. del_from_bucket_list(head, bs, ph, sp_bits);
  681. if (smallpage)
  682. add_small_page_to_freelist(head, ph, sp_bits);
  683. else
  684. add_large_page_to_freelist(head, ph, sp_bits);
  685. }
  686. }
  687. unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
  688. {
  689. struct header *head = pool;
  690. unsigned int pgnum, sp_bits;
  691. unsigned long offset = (char *)p - (char *)pool;
  692. struct page_header *ph;
  693. if (poolsize < MIN_USEFUL_SIZE)
  694. return tiny_alloc_size(pool, poolsize, p);
  695. /* Get page header. */
  696. sp_bits = small_page_bits(poolsize);
  697. pgnum = offset >> sp_bits;
  698. /* Big page? Round down further. */
  699. if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE))
  700. pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
  701. /* Step back to page header. */
  702. ph = from_pgnum(head, pgnum, sp_bits);
  703. if ((void *)ph == p)
  704. return huge_size(head, p);
  705. return bucket_to_size(ph->bucket);
  706. }
  707. /* Useful for gdb breakpoints. */
  708. static bool check_fail(void)
  709. {
  710. return false;
  711. }
  712. static unsigned long count_bits(const unsigned long bitmap[],
  713. unsigned long limit)
  714. {
  715. unsigned long i, count = 0;
  716. while (limit >= BITS_PER_LONG) {
  717. count += popcount(bitmap[0]);
  718. bitmap++;
  719. limit -= BITS_PER_LONG;
  720. }
  721. for (i = 0; i < limit; i++)
  722. if (test_bit(bitmap, i))
  723. count++;
  724. return count;
  725. }
  726. static bool out_of_bounds(unsigned long pgnum,
  727. unsigned int sp_bits,
  728. unsigned long pagesize,
  729. unsigned long poolsize)
  730. {
  731. if (((pgnum << sp_bits) >> sp_bits) != pgnum)
  732. return true;
  733. if ((pgnum << sp_bits) > poolsize)
  734. return true;
  735. return ((pgnum << sp_bits) + pagesize > poolsize);
  736. }
  737. static bool check_bucket(struct header *head,
  738. unsigned long poolsize,
  739. unsigned long pages[],
  740. struct bucket_state *bs,
  741. unsigned int bindex)
  742. {
  743. bool lp_bucket;
  744. struct page_header *ph;
  745. unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
  746. sp_bits = small_page_bits(poolsize);
  747. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  748. lp_bucket = large_page_bucket(bindex, sp_bits);
  749. pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
  750. /* This many elements fit? */
  751. taken = page_header_size(bindex / INTER_BUCKET_SPACE,
  752. bs->elements_per_page);
  753. taken += bucket_to_size(bindex) * bs->elements_per_page;
  754. if (taken > pagesize)
  755. return check_fail();
  756. /* One more wouldn't fit? */
  757. taken = page_header_size(bindex / INTER_BUCKET_SPACE,
  758. bs->elements_per_page + 1);
  759. taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
  760. if (taken <= pagesize)
  761. return check_fail();
  762. /* Walk used list. */
  763. prev = 0;
  764. for (i = bs->page_list; i; i = ph->next) {
  765. /* Bad pointer? */
  766. if (out_of_bounds(i, sp_bits, pagesize, poolsize))
  767. return check_fail();
  768. /* Wrong size page? */
  769. if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
  770. != lp_bucket)
  771. return check_fail();
  772. /* Large page not on boundary? */
  773. if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
  774. return check_fail();
  775. ph = from_pgnum(head, i, sp_bits);
  776. /* Linked list corrupt? */
  777. if (ph->prev != prev)
  778. return check_fail();
  779. /* Already seen this page? */
  780. if (test_bit(pages, i))
  781. return check_fail();
  782. set_bit(pages, i);
  783. /* Empty or full? */
  784. if (ph->elements_used == 0)
  785. return check_fail();
  786. if (ph->elements_used >= bs->elements_per_page)
  787. return check_fail();
  788. /* Used bits don't agree? */
  789. if (ph->elements_used != count_bits(ph->used,
  790. bs->elements_per_page))
  791. return check_fail();
  792. /* Wrong bucket? */
  793. if (ph->bucket != bindex)
  794. return check_fail();
  795. prev = i;
  796. }
  797. /* Walk full list. */
  798. prev = 0;
  799. for (i = bs->full_list; i; i = ph->next) {
  800. /* Bad pointer? */
  801. if (out_of_bounds(i, sp_bits, pagesize, poolsize))
  802. return check_fail();
  803. /* Wrong size page? */
  804. if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
  805. != lp_bucket)
  806. /* Large page not on boundary? */
  807. if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
  808. return check_fail();
  809. ph = from_pgnum(head, i, sp_bits);
  810. /* Linked list corrupt? */
  811. if (ph->prev != prev)
  812. return check_fail();
  813. /* Already seen this page? */
  814. if (test_bit(pages, i))
  815. return check_fail();
  816. set_bit(pages, i);
  817. /* Not full? */
  818. if (ph->elements_used != bs->elements_per_page)
  819. return check_fail();
  820. /* Used bits don't agree? */
  821. if (ph->elements_used != count_bits(ph->used,
  822. bs->elements_per_page))
  823. return check_fail();
  824. /* Wrong bucket? */
  825. if (ph->bucket != bindex)
  826. return check_fail();
  827. prev = i;
  828. }
  829. return true;
  830. }
  831. bool alloc_check(void *pool, unsigned long poolsize)
  832. {
  833. struct header *head = pool;
  834. unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
  835. struct page_header *ph;
  836. struct huge_alloc *ha;
  837. unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 };
  838. if (poolsize < MIN_USEFUL_SIZE)
  839. return tiny_alloc_check(pool, poolsize);
  840. sp_bits = small_page_bits(poolsize);
  841. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  842. num_buckets = max_bucket(lp_bits);
  843. header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
  844. /* First, set all bits taken by header. */
  845. for (i = 0; i < header_size; i += (1UL << sp_bits))
  846. set_bit(pages, i >> sp_bits);
  847. /* Check small page free list. */
  848. prev = 0;
  849. for (i = head->small_free_list; i; i = ph->next) {
  850. /* Bad pointer? */
  851. if (out_of_bounds(i, sp_bits, 1UL << sp_bits, poolsize))
  852. return check_fail();
  853. /* Large page? */
  854. if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
  855. return check_fail();
  856. ph = from_pgnum(head, i, sp_bits);
  857. /* Linked list corrupt? */
  858. if (ph->prev != prev)
  859. return check_fail();
  860. /* Already seen this page? */
  861. if (test_bit(pages, i))
  862. return check_fail();
  863. set_bit(pages, i);
  864. prev = i;
  865. }
  866. /* Check large page free list. */
  867. prev = 0;
  868. for (i = head->large_free_list; i; i = ph->next) {
  869. /* Bad pointer? */
  870. if (out_of_bounds(i, sp_bits, 1UL << lp_bits, poolsize))
  871. return check_fail();
  872. /* Not large page? */
  873. if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
  874. return check_fail();
  875. /* Not page boundary? */
  876. if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
  877. return check_fail();
  878. ph = from_pgnum(head, i, sp_bits);
  879. /* Linked list corrupt? */
  880. if (ph->prev != prev)
  881. return check_fail();
  882. /* Already seen this page? */
  883. if (test_bit(pages, i))
  884. return check_fail();
  885. set_bit(pages, i);
  886. prev = i;
  887. }
  888. /* Check the buckets. */
  889. for (i = 0; i < max_bucket(lp_bits); i++) {
  890. struct bucket_state *bs = &head->bs[i];
  891. if (!check_bucket(head, poolsize, pages, bs, i))
  892. return false;
  893. }
  894. /* Check the huge alloc list. */
  895. prev = 0;
  896. for (i = head->huge; i; i = ha->next) {
  897. unsigned long pgbits, j;
  898. /* Bad pointer? */
  899. if (i >= poolsize || i + sizeof(*ha) > poolsize)
  900. return check_fail();
  901. ha = (void *)((char *)head + i);
  902. /* Check contents of ha. */
  903. if (ha->off > poolsize || ha->off + ha->len > poolsize)
  904. return check_fail();
  905. /* Large or small page? */
  906. pgbits = test_bit(head->pagesize, ha->off >> lp_bits)
  907. ? lp_bits : sp_bits;
  908. /* Not page boundary? */
  909. if ((ha->off % (1UL << pgbits)) != 0)
  910. return check_fail();
  911. /* Not page length? */
  912. if ((ha->len % (1UL << pgbits)) != 0)
  913. return check_fail();
  914. /* Linked list corrupt? */
  915. if (ha->prev != prev)
  916. return check_fail();
  917. for (j = ha->off; j < ha->off + ha->len; j += (1UL<<sp_bits)) {
  918. /* Already seen this page? */
  919. if (test_bit(pages, j >> sp_bits))
  920. return check_fail();
  921. set_bit(pages, j >> sp_bits);
  922. }
  923. prev = i;
  924. }
  925. /* Make sure every page accounted for. */
  926. for (i = 0; i < poolsize >> sp_bits; i++) {
  927. if (!test_bit(pages, i))
  928. return check_fail();
  929. if (test_bit(head->pagesize,
  930. i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
  931. /* Large page, skip rest. */
  932. i += SMALL_PAGES_PER_LARGE_PAGE - 1;
  933. }
  934. }
  935. return true;
  936. }
  937. static unsigned long print_overhead(FILE *out, const char *desc,
  938. unsigned long bytes,
  939. unsigned long poolsize)
  940. {
  941. fprintf(out, "Overhead (%s): %lu bytes (%.3g%%)\n",
  942. desc, bytes, 100.0 * bytes / poolsize);
  943. return bytes;
  944. }
  945. static unsigned long count_list(struct header *head,
  946. u16 pgnum,
  947. unsigned int sp_bits,
  948. unsigned long *total_elems)
  949. {
  950. struct page_header *p;
  951. unsigned long ret = 0;
  952. while (pgnum) {
  953. p = from_pgnum(head, pgnum, sp_bits);
  954. if (total_elems)
  955. (*total_elems) += p->elements_used;
  956. ret++;
  957. pgnum = p->next;
  958. }
  959. return ret;
  960. }
  961. static unsigned long visualize_bucket(FILE *out, struct header *head,
  962. unsigned int bucket,
  963. unsigned long poolsize,
  964. unsigned int sp_bits)
  965. {
  966. unsigned long num_full, num_partial, num_pages, page_size,
  967. elems, hdr_min, hdr_size, elems_per_page, overhead = 0;
  968. elems_per_page = head->bs[bucket].elements_per_page;
  969. /* If we used byte-based bitmaps, we could get pg hdr to: */
  970. hdr_min = sizeof(struct page_header)
  971. - sizeof(((struct page_header *)0)->used)
  972. + align_up(elems_per_page, CHAR_BIT) / CHAR_BIT;
  973. hdr_size = page_header_size(bucket / INTER_BUCKET_SPACE,
  974. elems_per_page);
  975. elems = 0;
  976. num_full = count_list(head, head->bs[bucket].full_list, sp_bits,
  977. &elems);
  978. num_partial = count_list(head, head->bs[bucket].page_list, sp_bits,
  979. &elems);
  980. num_pages = num_full + num_partial;
  981. if (!num_pages)
  982. return 0;
  983. fprintf(out, "Bucket %u (%lu bytes):"
  984. " %lu full, %lu partial = %lu elements\n",
  985. bucket, bucket_to_size(bucket), num_full, num_partial, elems);
  986. /* Strict requirement of page header size. */
  987. overhead += print_overhead(out, "page headers",
  988. hdr_min * num_pages, poolsize);
  989. /* Gap between minimal page header and actual start. */
  990. overhead += print_overhead(out, "page post-header alignments",
  991. (hdr_size - hdr_min) * num_pages, poolsize);
  992. /* Between last element and end of page. */
  993. page_size = (1UL << sp_bits);
  994. if (large_page_bucket(bucket, sp_bits))
  995. page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE;
  996. overhead += print_overhead(out, "page tails",
  997. (page_size - (hdr_size
  998. + (elems_per_page
  999. * bucket_to_size(bucket))))
  1000. * num_pages, poolsize);
  1001. return overhead;
  1002. }
  1003. void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
  1004. {
  1005. struct header *head = pool;
  1006. unsigned long i, lp_bits, sp_bits, header_size, num_buckets, count,
  1007. overhead = 0;
  1008. fprintf(out, "Pool %p size %lu: (%s allocator)\n", pool, poolsize,
  1009. poolsize < MIN_USEFUL_SIZE ? "tiny" : "standard");
  1010. if (poolsize < MIN_USEFUL_SIZE) {
  1011. tiny_alloc_visualize(out, pool, poolsize);
  1012. return;
  1013. }
  1014. sp_bits = small_page_bits(poolsize);
  1015. lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
  1016. num_buckets = max_bucket(lp_bits);
  1017. header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
  1018. fprintf(out, "Large page size %lu, small page size %lu.\n",
  1019. 1UL << lp_bits, 1UL << sp_bits);
  1020. overhead += print_overhead(out, "unused pool tail",
  1021. poolsize % (1UL << lp_bits), poolsize);
  1022. fprintf(out, "Main header %lu bytes (%lu small pages).\n",
  1023. header_size, align_up(header_size, 1UL << sp_bits) >> sp_bits);
  1024. overhead += print_overhead(out, "partial header page",
  1025. align_up(header_size, 1UL << sp_bits)
  1026. - header_size, poolsize);
  1027. /* Total large pages. */
  1028. i = count_bits(head->pagesize, poolsize >> lp_bits);
  1029. /* Used pages. */
  1030. count = i - count_list(head, head->large_free_list, sp_bits, NULL);
  1031. fprintf(out, "%lu/%lu large pages used (%.3g%%)\n",
  1032. count, i, count ? 100.0 * count / i : 0.0);
  1033. /* Total small pages. */
  1034. i = ((poolsize >> lp_bits) - i) << BITS_FROM_SMALL_TO_LARGE_PAGE;
  1035. /* Used pages */
  1036. count = i - count_list(head, head->small_free_list, sp_bits, NULL);
  1037. fprintf(out, "%lu/%lu small pages used (%.3g%%)\n",
  1038. count, i, count ? 100.0 * count / i : 0.0);
  1039. /* Summary of each bucket. */
  1040. fprintf(out, "%lu buckets:\n", num_buckets);
  1041. for (i = 0; i < num_buckets; i++)
  1042. overhead += visualize_bucket(out, head, i, poolsize, sp_bits);
  1043. print_overhead(out, "total", overhead, poolsize);
  1044. }