alloc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. #include <unistd.h>
  2. #include <stdint.h>
  3. #include <string.h>
  4. #include <limits.h>
  5. #include <assert.h>
  6. #include "alloc.h"
  7. #include "build_assert/build_assert.h"
  8. #include "config.h"
  9. #if HAVE_ALIGNOF
  10. #define ALIGNOF(t) __alignof__(t)
  11. #else
  12. /* Alignment by measuring structure padding. */
  13. #define ALIGNOF(t) (sizeof(struct { char c; t _h; }) - 1 - sizeof(t))
  14. #endif
  15. /* FIXME: Doesn't handle non-page-aligned poolsize. */
  16. /* FIXME: Reduce. */
  17. #define MIN_SIZE (getpagesize() * 2)
  18. /* What's the granularity of sub-page allocs? */
  19. #define BITMAP_GRANULARITY 4
  20. /* File layout:
  21. *
  22. * file := pagestates pad metadata
  23. * pagestates := pages * 2-bits-per-page
  24. * pad := pad to next ALIGNOF(metadata)
  25. *
  26. * metadata := metalen next-ptr metabits
  27. * metabits := freeblock | bitblock
  28. * freeblock := 0+
  29. * bitblock := 2-bits-per-bit-in-page 1
  30. */
  31. struct metaheader
  32. {
  33. /* Length (after this header). (FIXME: Could be in pages). */
  34. unsigned long metalen;
  35. /* Next meta header, or 0 */
  36. unsigned long next;
  37. /* Bits start here. */
  38. };
  39. #define BITS_PER_PAGE 2
  40. /* FIXME: Don't use page states for bitblock. It's tacky and confusing. */
  41. enum page_state
  42. {
  43. FREE,
  44. TAKEN,
  45. TAKEN_START,
  46. SUBPAGE,
  47. };
  48. /* Assumes a is a power of two. */
  49. static unsigned long align_up(unsigned long x, unsigned long a)
  50. {
  51. return (x + a - 1) & ~(a - 1);
  52. }
  53. static unsigned long div_up(unsigned long x, unsigned long a)
  54. {
  55. return (x + a - 1) / a;
  56. }
  57. /* The offset of metadata for a subpage allocation is found at the end
  58. * of the subpage */
  59. #define SUBPAGE_METAOFF (getpagesize() - sizeof(unsigned long))
  60. /* This is the length of metadata in bits. It consists of two bits
  61. * for every BITMAP_GRANULARITY of usable bytes in the page, then two
  62. * bits for the TAKEN tailer.. */
  63. #define BITMAP_METABITLEN \
  64. ((div_up(SUBPAGE_METAOFF, BITMAP_GRANULARITY) + 1) * BITS_PER_PAGE)
  65. /* This is the length in bytes. */
  66. #define BITMAP_METALEN (div_up(BITMAP_METABITLEN, CHAR_BIT))
  67. static enum page_state get_page_state(const uint8_t *bits, unsigned long page)
  68. {
  69. return bits[page * 2 / CHAR_BIT] >> (page * 2 % CHAR_BIT) & 3;
  70. }
  71. static void set_page_state(uint8_t *bits, unsigned long page, enum page_state s)
  72. {
  73. bits[page * 2 / CHAR_BIT] &= ~(3 << (page * 2 % CHAR_BIT));
  74. bits[page * 2 / CHAR_BIT] |= ((uint8_t)s << (page * 2 % CHAR_BIT));
  75. }
  76. static struct metaheader *first_mheader(void *pool, unsigned long poolsize)
  77. {
  78. unsigned int pagestatelen;
  79. pagestatelen = align_up(div_up(poolsize/getpagesize() * BITS_PER_PAGE,
  80. CHAR_BIT),
  81. ALIGNOF(struct metaheader));
  82. return (struct metaheader *)((char *)pool + pagestatelen);
  83. }
  84. static struct metaheader *next_mheader(void *pool, struct metaheader *mh)
  85. {
  86. if (!mh->next)
  87. return NULL;
  88. return (struct metaheader *)((char *)pool + mh->next);
  89. }
  90. static unsigned long pool_offset(void *pool, void *p)
  91. {
  92. return (char *)p - (char *)pool;
  93. }
  94. void alloc_init(void *pool, unsigned long poolsize)
  95. {
  96. /* FIXME: Alignment assumptions about pool. */
  97. unsigned long len, i;
  98. struct metaheader *mh;
  99. if (poolsize < MIN_SIZE)
  100. return;
  101. mh = first_mheader(pool, poolsize);
  102. /* len covers all page states, plus the metaheader. */
  103. len = (char *)(mh + 1) - (char *)pool;
  104. /* Mark all page states FREE */
  105. BUILD_ASSERT(FREE == 0);
  106. memset(pool, 0, len);
  107. /* metaheader len takes us up to next page boundary. */
  108. mh->metalen = align_up(len, getpagesize()) - len;
  109. /* Mark the pagestate and metadata page(s) allocated. */
  110. set_page_state(pool, 0, TAKEN_START);
  111. for (i = 1; i < div_up(len, getpagesize()); i++)
  112. set_page_state(pool, i, TAKEN);
  113. }
  114. /* Two bits per element, representing page states. Returns 0 on fail. */
  115. static unsigned long alloc_from_bitmap(uint8_t *bits, unsigned long elems,
  116. unsigned long want, unsigned long align)
  117. {
  118. long i;
  119. unsigned long free;
  120. free = 0;
  121. /* We allocate from far end, to increase ability to expand metadata. */
  122. for (i = elems - 1; i >= 0; i--) {
  123. switch (get_page_state(bits, i)) {
  124. case FREE:
  125. if (++free >= want) {
  126. unsigned long j;
  127. /* They might ask for large alignment. */
  128. if (align && i % align)
  129. continue;
  130. for (j = i+1; j < i + want; j++)
  131. set_page_state(bits, j, TAKEN);
  132. set_page_state(bits, i, TAKEN_START);
  133. return i;
  134. }
  135. break;
  136. case SUBPAGE:
  137. case TAKEN_START:
  138. case TAKEN:
  139. free = 0;
  140. break;
  141. }
  142. }
  143. return 0;
  144. }
  145. static unsigned long alloc_get_pages(void *pool, unsigned long poolsize,
  146. unsigned long pages, unsigned long align)
  147. {
  148. long i;
  149. unsigned long free;
  150. free = 0;
  151. /* We allocate from far end, to increase ability to expand metadata. */
  152. for (i = poolsize / getpagesize() - 1; i >= 0; i--) {
  153. switch (get_page_state(pool, i)) {
  154. case FREE:
  155. if (++free >= pages) {
  156. unsigned long j, addr;
  157. addr = (unsigned long)pool + i * getpagesize();
  158. /* They might ask for multi-page alignment. */
  159. if (addr % align)
  160. continue;
  161. for (j = i+1; j < i + pages; j++)
  162. set_page_state(pool, j, TAKEN);
  163. set_page_state(pool, i, TAKEN_START);
  164. return i;
  165. }
  166. break;
  167. case SUBPAGE:
  168. case TAKEN_START:
  169. case TAKEN:
  170. free = 0;
  171. break;
  172. }
  173. }
  174. return 0;
  175. }
  176. /* Offset to metadata is at end of page. */
  177. static unsigned long *metadata_off(void *pool, unsigned long page)
  178. {
  179. return (unsigned long *)
  180. ((char *)pool + (page+1)*getpagesize() - sizeof(unsigned long));
  181. }
  182. static uint8_t *get_page_metadata(void *pool, unsigned long page)
  183. {
  184. return (uint8_t *)pool + *metadata_off(pool, page);
  185. }
  186. static void set_page_metadata(void *pool, unsigned long page, uint8_t *meta)
  187. {
  188. *metadata_off(pool, page) = meta - (uint8_t *)pool;
  189. }
  190. static void *sub_page_alloc(void *pool, unsigned long page,
  191. unsigned long size, unsigned long align)
  192. {
  193. uint8_t *bits = get_page_metadata(pool, page);
  194. unsigned long i;
  195. /* TAKEN at end means a bitwise alloc. */
  196. assert(get_page_state(bits, getpagesize()/BITMAP_GRANULARITY - 1)
  197. == TAKEN);
  198. /* Our bits are the same as the page bits. */
  199. i = alloc_from_bitmap(bits, SUBPAGE_METAOFF/BITMAP_GRANULARITY,
  200. div_up(size, BITMAP_GRANULARITY),
  201. align / BITMAP_GRANULARITY);
  202. /* Can't allocate? */
  203. if (i == 0)
  204. return NULL;
  205. return (char *)pool + page*getpagesize() + i*BITMAP_GRANULARITY;
  206. }
  207. static uint8_t *alloc_metaspace(struct metaheader *mh, unsigned long bytes)
  208. {
  209. uint8_t *meta = (uint8_t *)(mh + 1);
  210. unsigned long free = 0, len;
  211. long i;
  212. /* TAKEN tags end a subpage alloc. */
  213. for (i = mh->metalen * CHAR_BIT / BITS_PER_PAGE - 1; i >= 0; i -= len) {
  214. switch (get_page_state(meta, i)) {
  215. case FREE:
  216. len = 1;
  217. free++;
  218. if (free == bytes * CHAR_BIT / BITS_PER_PAGE) {
  219. /* TAKEN marks end of metablock. */
  220. set_page_state(meta, i + free - 1, TAKEN);
  221. return meta + i / (CHAR_BIT / BITS_PER_PAGE);
  222. }
  223. break;
  224. case TAKEN:
  225. /* Skip over this allocated part. */
  226. len = BITMAP_METALEN * CHAR_BIT / BITS_PER_PAGE;
  227. free = 0;
  228. break;
  229. default:
  230. assert(0);
  231. return NULL;
  232. }
  233. }
  234. return NULL;
  235. }
  236. /* We need this many bytes of metadata. */
  237. static uint8_t *new_metadata(void *pool, unsigned long poolsize,
  238. unsigned long bytes)
  239. {
  240. struct metaheader *mh, *newmh;
  241. unsigned long page;
  242. for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
  243. uint8_t *meta = alloc_metaspace(mh, bytes);
  244. if (meta)
  245. return meta;
  246. }
  247. /* No room for metadata? Can we expand an existing one? */
  248. for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
  249. /* It should end on a page boundary. */
  250. unsigned long nextpage;
  251. nextpage = pool_offset(pool, (char *)(mh + 1) + mh->metalen);
  252. assert(nextpage % getpagesize() == 0);
  253. /* Now, can we grab that page? */
  254. if (get_page_state(pool, nextpage / getpagesize()) != FREE)
  255. continue;
  256. /* OK, expand metadata, do it again. */
  257. set_page_state(pool, nextpage / getpagesize(), TAKEN);
  258. BUILD_ASSERT(FREE == 0);
  259. memset((char *)pool + nextpage, 0, getpagesize());
  260. mh->metalen += getpagesize();
  261. return alloc_metaspace(mh, bytes);
  262. }
  263. /* No metadata left at all? */
  264. page = alloc_get_pages(pool, poolsize, div_up(bytes, getpagesize()), 1);
  265. if (!page)
  266. return NULL;
  267. newmh = (struct metaheader *)((char *)pool + page * getpagesize());
  268. newmh->metalen = getpagesize() - sizeof(*mh);
  269. BUILD_ASSERT(FREE == 0);
  270. memset(newmh + 1, 0, newmh->metalen);
  271. /* Sew it into linked list */
  272. mh = first_mheader(pool,poolsize);
  273. newmh->next = mh->next;
  274. mh->next = (char *)newmh - (char *)pool;
  275. return alloc_metaspace(newmh, bytes);
  276. }
  277. static void alloc_free_pages(void *pool, unsigned long pagenum)
  278. {
  279. assert(get_page_state(pool, pagenum) == TAKEN_START);
  280. set_page_state(pool, pagenum, FREE);
  281. while (get_page_state(pool, ++pagenum) == TAKEN)
  282. set_page_state(pool, pagenum, FREE);
  283. }
  284. static void *alloc_sub_page(void *pool, unsigned long poolsize,
  285. unsigned long size, unsigned long align)
  286. {
  287. unsigned long i;
  288. uint8_t *metadata;
  289. /* Look for partial page. */
  290. for (i = 0; i < poolsize / getpagesize(); i++) {
  291. void *ret;
  292. if (get_page_state(pool, i) != SUBPAGE)
  293. continue;
  294. ret = sub_page_alloc(pool, i, size, align);
  295. if (ret)
  296. return ret;
  297. }
  298. /* Create new SUBPAGE page. */
  299. i = alloc_get_pages(pool, poolsize, 1, 1);
  300. if (i == 0)
  301. return NULL;
  302. /* Get metadata for page. */
  303. metadata = new_metadata(pool, poolsize, BITMAP_METALEN);
  304. if (!metadata) {
  305. alloc_free_pages(pool, i);
  306. return NULL;
  307. }
  308. /* Actually, this is a SUBPAGE page now. */
  309. set_page_state(pool, i, SUBPAGE);
  310. /* Set metadata pointer for page. */
  311. set_page_metadata(pool, i, metadata);
  312. /* Do allocation like normal */
  313. return sub_page_alloc(pool, i, size, align);
  314. }
  315. void *alloc_get(void *pool, unsigned long poolsize,
  316. unsigned long size, unsigned long align)
  317. {
  318. if (poolsize < MIN_SIZE)
  319. return NULL;
  320. /* Sub-page allocations have an overhead of 25%. */
  321. if (size + size/4 >= getpagesize() || align >= getpagesize()) {
  322. unsigned long ret, pages = div_up(size, getpagesize());
  323. ret = alloc_get_pages(pool, poolsize, pages, align);
  324. if (ret == 0)
  325. return NULL;
  326. return (char *)pool + ret * getpagesize();
  327. }
  328. return alloc_sub_page(pool, poolsize, size, align);
  329. }
  330. static void subpage_free(void *pool, unsigned long pagenum, void *free)
  331. {
  332. unsigned long off = (unsigned long)free % getpagesize();
  333. uint8_t *metadata;
  334. assert(off < SUBPAGE_METAOFF);
  335. assert(off % BITMAP_GRANULARITY == 0);
  336. metadata = get_page_metadata(pool, pagenum);
  337. off /= BITMAP_GRANULARITY;
  338. set_page_state(metadata, off++, FREE);
  339. while (off < SUBPAGE_METAOFF / BITMAP_GRANULARITY
  340. && get_page_state(metadata, off) == TAKEN)
  341. set_page_state(metadata, off++, FREE);
  342. /* FIXME: If whole page free, free page and metadata. */
  343. }
  344. void alloc_free(void *pool, unsigned long poolsize, void *free)
  345. {
  346. unsigned long pagenum;
  347. struct metaheader *mh;
  348. if (!free)
  349. return;
  350. assert(poolsize >= MIN_SIZE);
  351. mh = first_mheader(pool, poolsize);
  352. assert((char *)free >= (char *)(mh + 1) + mh->metalen);
  353. assert((char *)pool + poolsize > (char *)free);
  354. pagenum = pool_offset(pool, free) / getpagesize();
  355. if (get_page_state(pool, pagenum) == SUBPAGE)
  356. subpage_free(pool, pagenum, free);
  357. else {
  358. assert((unsigned long)free % getpagesize() == 0);
  359. alloc_free_pages(pool, pagenum);
  360. }
  361. }
  362. static bool is_metadata_page(void *pool, unsigned long poolsize,
  363. unsigned long page)
  364. {
  365. struct metaheader *mh;
  366. for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
  367. unsigned long start, end;
  368. start = pool_offset(pool, mh);
  369. end = pool_offset(pool, (char *)(mh+1) + mh->metalen);
  370. if (page >= start/getpagesize() && page < end/getpagesize())
  371. return true;
  372. }
  373. return false;
  374. }
  375. static bool check_subpage(void *pool, unsigned long poolsize,
  376. unsigned long page)
  377. {
  378. unsigned long *mhoff = metadata_off(pool, page);
  379. unsigned int i;
  380. enum page_state last_state = FREE;
  381. if (*mhoff + sizeof(struct metaheader) > poolsize)
  382. return false;
  383. if (*mhoff % ALIGNOF(struct metaheader) != 0)
  384. return false;
  385. /* It must point to a metadata page. */
  386. if (!is_metadata_page(pool, poolsize, *mhoff / getpagesize()))
  387. return false;
  388. /* Marker at end of subpage allocation is "taken" */
  389. if (get_page_state((uint8_t *)pool + *mhoff,
  390. getpagesize()/BITMAP_GRANULARITY - 1) != TAKEN)
  391. return false;
  392. for (i = 0; i < SUBPAGE_METAOFF / BITMAP_GRANULARITY; i++) {
  393. enum page_state state;
  394. state = get_page_state((uint8_t *)pool + *mhoff, i);
  395. switch (state) {
  396. case SUBPAGE:
  397. return false;
  398. case TAKEN:
  399. if (last_state == FREE)
  400. return false;
  401. break;
  402. default:
  403. break;
  404. }
  405. last_state = state;
  406. }
  407. return true;
  408. }
  409. bool alloc_check(void *pool, unsigned long poolsize)
  410. {
  411. unsigned long i;
  412. struct metaheader *mh;
  413. enum page_state last_state = FREE;
  414. bool was_metadata = false;
  415. if (poolsize < MIN_SIZE)
  416. return true;
  417. if (get_page_state(pool, 0) != TAKEN_START)
  418. return false;
  419. /* First check metadata pages. */
  420. /* Metadata pages will be marked TAKEN. */
  421. for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
  422. unsigned long start, end;
  423. start = pool_offset(pool, mh);
  424. if (start + sizeof(*mh) > poolsize)
  425. return false;
  426. end = pool_offset(pool, (char *)(mh+1) + mh->metalen);
  427. if (end > poolsize)
  428. return false;
  429. /* Non-first pages should start on a page boundary. */
  430. if (mh != first_mheader(pool, poolsize)
  431. && start % getpagesize() != 0)
  432. return false;
  433. /* It should end on a page boundary. */
  434. if (end % getpagesize() != 0)
  435. return false;
  436. }
  437. for (i = 0; i < poolsize / getpagesize(); i++) {
  438. enum page_state state = get_page_state(pool, i);
  439. bool is_metadata = is_metadata_page(pool, poolsize,i);
  440. switch (state) {
  441. case FREE:
  442. /* metadata pages are never free. */
  443. if (is_metadata)
  444. return false;
  445. case TAKEN_START:
  446. break;
  447. case TAKEN:
  448. /* This should continue a previous block. */
  449. if (last_state == FREE)
  450. return false;
  451. if (is_metadata != was_metadata)
  452. return false;
  453. break;
  454. case SUBPAGE:
  455. /* Check metadata pointer etc. */
  456. if (!check_subpage(pool, poolsize, i))
  457. return false;
  458. }
  459. last_state = state;
  460. was_metadata = is_metadata;
  461. }
  462. return true;
  463. }
  464. void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
  465. {
  466. struct metaheader *mh;
  467. unsigned long pagebitlen, metadata_pages, count[1<<BITS_PER_PAGE], tot;
  468. long i;
  469. if (poolsize < MIN_SIZE) {
  470. fprintf(out, "Pool smaller than %u: no content\n", MIN_SIZE);
  471. return;
  472. }
  473. memset(count, 0, sizeof(count));
  474. for (i = 0; i < poolsize / getpagesize(); i++)
  475. count[get_page_state(pool, i)]++;
  476. mh = first_mheader(pool, poolsize);
  477. pagebitlen = (char *)mh - (char *)pool;
  478. fprintf(out, "%lu bytes of page bits: FREE/TAKEN/TAKEN_START/SUBPAGE = %lu/%lu/%lu/%lu\n",
  479. pagebitlen, count[0], count[1], count[2], count[3]);
  480. /* One metadata page for every page of page bits. */
  481. metadata_pages = div_up(pagebitlen, getpagesize());
  482. /* Now do each metadata page. */
  483. for (; mh; mh = next_mheader(pool,mh)) {
  484. unsigned long free = 0, subpageblocks = 0, len = 0;
  485. uint8_t *meta = (uint8_t *)(mh + 1);
  486. metadata_pages += (sizeof(*mh) + mh->metalen) / getpagesize();
  487. /* TAKEN tags end a subpage alloc. */
  488. for (i = mh->metalen * CHAR_BIT/BITS_PER_PAGE - 1;
  489. i >= 0;
  490. i -= len) {
  491. switch (get_page_state(meta, i)) {
  492. case FREE:
  493. len = 1;
  494. free++;
  495. break;
  496. case TAKEN:
  497. /* Skip over this allocated part. */
  498. len = BITMAP_METALEN * CHAR_BIT;
  499. subpageblocks++;
  500. break;
  501. default:
  502. assert(0);
  503. }
  504. }
  505. fprintf(out, "Metadata %lu-%lu: %lu free, %lu subpageblocks, %lu%% density\n",
  506. pool_offset(pool, mh),
  507. pool_offset(pool, (char *)(mh+1) + mh->metalen),
  508. free, subpageblocks,
  509. subpageblocks * BITMAP_METALEN * 100
  510. / (free + subpageblocks * BITMAP_METALEN));
  511. }
  512. /* Account for total pages allocated. */
  513. tot = (count[1] + count[2] - metadata_pages) * getpagesize();
  514. fprintf(out, "Total metadata bytes = %lu\n",
  515. metadata_pages * getpagesize());
  516. /* Now do every subpage. */
  517. for (i = 0; i < poolsize / getpagesize(); i++) {
  518. uint8_t *meta;
  519. unsigned int j;
  520. if (get_page_state(pool, i) != SUBPAGE)
  521. continue;
  522. memset(count, 0, sizeof(count));
  523. meta = get_page_metadata(pool, i);
  524. for (j = 0; j < SUBPAGE_METAOFF/BITMAP_GRANULARITY; j++)
  525. count[get_page_state(meta, j)]++;
  526. fprintf(out, "Subpage %lu: "
  527. "FREE/TAKEN/TAKEN_START = %lu/%lu/%lu %lu%% density\n",
  528. i, count[0], count[1], count[2],
  529. ((count[1] + count[2]) * BITMAP_GRANULARITY) * 100
  530. / getpagesize());
  531. tot += (count[1] + count[2]) * BITMAP_GRANULARITY;
  532. }
  533. /* This is optimistic, since we overalloc in several cases. */
  534. fprintf(out, "Best possible allocation density = %lu%%\n",
  535. tot * 100 / poolsize);
  536. }