bdelta.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838
  1. /*
  2. * Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. * THE SOFTWARE.
  21. */
  22. #include "bdelta.h"
  23. #include <assert.h>
  24. #include <limits.h>
  25. #include <stdint.h>
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. typedef struct
  30. {
  31. unsigned char *cur; /* End of string; insertion point for new bytes */
  32. unsigned char *end; /* End of buffer */
  33. unsigned char *start; /* Beginning of string */
  34. } SB;
  35. /* sb is evaluated multiple times in these macros. */
  36. #define sb_size(sb) ((size_t)((sb)->cur - (sb)->start))
  37. #define sb_avail(sb) ((size_t)((sb)->end - (sb)->cur))
  38. /* sb and need may be evaluated multiple times. */
  39. #define sb_need(sb, need) do { \
  40. if (sb_avail(sb) < (need)) \
  41. if (sb_grow(sb, need) != 0) \
  42. goto out_of_memory; \
  43. } while (0)
  44. static int sb_init(SB *sb)
  45. {
  46. sb->start = malloc(17);
  47. if (sb->start == NULL)
  48. return -1;
  49. sb->cur = sb->start;
  50. sb->end = sb->start + 16;
  51. return 0;
  52. }
  53. static int sb_grow(SB *sb, size_t need)
  54. {
  55. size_t length = sb->cur - sb->start;
  56. size_t alloc = sb->end - sb->start;
  57. unsigned char *tmp;
  58. do {
  59. alloc *= 2;
  60. } while (alloc < length + need);
  61. tmp = realloc(sb->start, alloc + 1);
  62. if (tmp == NULL)
  63. return -1;
  64. sb->start = tmp;
  65. sb->cur = tmp + length;
  66. sb->end = tmp + alloc;
  67. return 0;
  68. }
  69. static int sb_putc(SB *sb, unsigned char c)
  70. {
  71. sb_need(sb, 1);
  72. *sb->cur++ = c;
  73. return 0;
  74. out_of_memory:
  75. return -1;
  76. }
  77. static int sb_write(SB *sb, const void *data, size_t size)
  78. {
  79. sb_need(sb, size);
  80. memcpy(sb->cur, data, size);
  81. sb->cur += size;
  82. return 0;
  83. out_of_memory:
  84. return -1;
  85. }
  86. static void sb_return(SB *sb, void **data_out, size_t *length_out)
  87. {
  88. *sb->cur = 0;
  89. if (data_out)
  90. *data_out = sb->start;
  91. else
  92. free(sb->start);
  93. if (length_out)
  94. *length_out = sb->cur - sb->start;
  95. }
  96. static void sb_discard(SB *sb, void **data_out, size_t *length_out)
  97. {
  98. free(sb->start);
  99. if (data_out)
  100. *data_out = NULL;
  101. if (length_out)
  102. *length_out = 0;
  103. }
  104. /*
  105. * The first byte in a patch is the "patch type", which indicates how the
  106. * patch is formatted. This keeps the patch format flexible while retaining
  107. * backward compatibility. Patches produced with an older version of
  108. * the library can be applied with a newer version.
  109. *
  110. * PT_LITERAL
  111. * Contains nothing more than the content of the new text.
  112. *
  113. * PT_CSI32
  114. * A string of copy, skip, and insert instructions for generating the new
  115. * string from the old.
  116. *
  117. * copy(size): Copy @size bytes from old to new.
  118. * skip(size): Skip @size bytes of old.
  119. * insert(text): Insert @text into new.
  120. *
  121. * The syntax is as follows:
  122. *
  123. * copy: instruction_byte(1) size
  124. * skip: instruction_byte(2) size
  125. * insert: instruction_byte(3) size $size*OCTET
  126. *
  127. * 0 <= size_param_length <= 4
  128. * instruction_byte(op) = op | size_param_length << 2
  129. * size: $size_param_length*OCTET
  130. * -- size is an unsigned integer encoded in big endian.
  131. * -- However, if size_param_length is 0, the operation size is 1.
  132. *
  133. * Simply put, an instruction starts with an opcode and operation size.
  134. * An insert instruction is followed by the bytes to be inserted.
  135. */
  136. #define PT_LITERAL 10
  137. #define PT_CSI32 11
  138. #define OP_COPY 1
  139. #define OP_SKIP 2
  140. #define OP_INSERT 3
  141. static unsigned int bytes_needed_for_size(uint32_t size)
  142. {
  143. if (size == 1)
  144. return 0;
  145. else if (size <= 0xFF)
  146. return 1;
  147. else if (size <= 0xFFFF)
  148. return 2;
  149. else if (size <= 0xFFFFFF)
  150. return 3;
  151. else
  152. return 4;
  153. }
  154. /*
  155. * Return values:
  156. *
  157. * BDELTA_OK: Success
  158. * BDELTA_MEMORY: Memory allocation failed
  159. */
  160. static BDELTAcode csi32_emit_op(SB *patch_out, int op, uint32_t size, const char **new_)
  161. {
  162. unsigned int i;
  163. unsigned int size_param_length;
  164. size_t need;
  165. uint32_t tmp;
  166. assert(op >= 1 && op <= 3);
  167. if (size == 0)
  168. return BDELTA_OK;
  169. size_param_length = bytes_needed_for_size(size);
  170. need = 1 + size_param_length;
  171. if (op == OP_INSERT)
  172. need += size;
  173. sb_need(patch_out, need);
  174. *patch_out->cur++ = (unsigned int)op | size_param_length << 2;
  175. for (i = size_param_length, tmp = size; i-- > 0; tmp >>= 8)
  176. patch_out->cur[i] = tmp & 0xFF;
  177. patch_out->cur += size_param_length;
  178. switch (op) {
  179. case OP_COPY:
  180. *new_ += size;
  181. break;
  182. case OP_SKIP:
  183. break;
  184. case OP_INSERT:
  185. memcpy(patch_out->cur, *new_, size);
  186. patch_out->cur += size;
  187. *new_ += size;
  188. break;
  189. default:
  190. assert(0);
  191. }
  192. return BDELTA_OK;
  193. out_of_memory:
  194. return BDELTA_MEMORY;
  195. }
  196. /*
  197. * On success, returns 1, advances *sp past the parsed text, and sets *op_out and *size_out.
  198. * On error or EOF, returns 0.
  199. */
  200. static int csi32_parse_op(
  201. const unsigned char **sp, const unsigned char *e,
  202. int *op_out, uint32_t *size_out)
  203. {
  204. const unsigned char *s = *sp;
  205. int op;
  206. unsigned int i;
  207. unsigned int size_param_length;
  208. uint32_t size;
  209. if (s >= e)
  210. return 0;
  211. op = *s & 3;
  212. size_param_length = *s >> 2;
  213. s++;
  214. if (op == 0 || size_param_length > 4)
  215. return 0;
  216. if (size_param_length == 0) {
  217. size = 1;
  218. } else {
  219. if ((size_t)(e - s) < size_param_length)
  220. return 0;
  221. size = 0;
  222. for (i = 0; i < size_param_length; i++) {
  223. size <<= 8;
  224. size |= *s++ & 0xFF;
  225. }
  226. }
  227. /* Make sure insert data fits in the patch, but don't consume it. */
  228. if (op == OP_INSERT && (size_t)(e - s) < size)
  229. return 0;
  230. *op_out = op;
  231. *size_out = size;
  232. *sp = s;
  233. return 1;
  234. }
  235. /*
  236. * bdelta uses the algorithm described in:
  237. *
  238. * Myers, E. (1986). An O(ND) Difference Algorithm and Its Variations.
  239. * Retrieved from http://www.xmailserver.org/diff2.pdf
  240. *
  241. * The pseudocode in Myers' paper (Figure 2) uses an array called V,
  242. * where (V[k], V[k] - k) is the endpoint of the furthest-reaching
  243. * D-path ending on diagonal k.
  244. *
  245. * The structure below holds the V array for every iteration of the outer loop.
  246. * Because each iteration produces D+1 values, a triangle is formed:
  247. *
  248. * k
  249. * -5 -4 -3 -2 -1 0 1 2 3 4 5
  250. * ----------------------------------
  251. * 0 | 0 (copy 0)
  252. * | \ skip 1
  253. * 1 | 0 1
  254. * | \ skip 1, then copy 1
  255. * 2 | 2 2 3
  256. * D | / insert 1, then copy 2
  257. * 3 | 3 4 5 5
  258. * | \ skip 1, then copy 1
  259. * 4 | 3 4 5 7 7
  260. * | / insert 1
  261. * 5 | 3 4 5 7 - -
  262. *
  263. * @data will literally contain: 0 0 1 2 2 3 3 4 5 5 3 4 5 7 7 3 4 5 7
  264. *
  265. * To convert this to an edit script, we first climb back to the top,
  266. * using the same procedure as was used when the triangle was generated:
  267. *
  268. * If k = -D, climb right (the only way we can go).
  269. * If k = +D, climb left (the only way we can go).
  270. * Otherwise, favor the greater number.
  271. * If the numbers are the same, climb left.
  272. *
  273. * Finally, we convert the descent to the solution to a patch script:
  274. *
  275. * The top number n corresponds to:
  276. * copy n
  277. *
  278. * A descent left from a to b corresponds to:
  279. * insert 1
  280. * copy b-a
  281. *
  282. * A descent right from a to b corresponds to:
  283. * skip 1
  284. * copy b-a-1
  285. */
  286. typedef struct
  287. {
  288. uint32_t *data;
  289. int solution_d;
  290. int solution_k;
  291. uint32_t *solution_ptr;
  292. } Triangle;
  293. /*
  294. * Return values:
  295. *
  296. * BDELTA_OK: Success
  297. * BDELTA_MEMORY: Memory allocation failed
  298. * BDELTA_INTERNAL_DMAX_EXCEEDED: d_max exceeded (strings are too different)
  299. */
  300. static BDELTAcode build_triangle(
  301. const char *old, uint32_t old_size,
  302. const char *new_, uint32_t new_size,
  303. int d_max,
  304. Triangle *triangle_out)
  305. {
  306. int d, k;
  307. uint32_t x, y;
  308. uint32_t *data;
  309. uint32_t *vprev; /* position within previous row */
  310. uint32_t *v; /* position within current row */
  311. uint32_t *vcur; /* beginning of current row */
  312. size_t data_alloc = 16;
  313. memset(triangle_out, 0, sizeof(*triangle_out));
  314. data = malloc(data_alloc * sizeof(*data));
  315. if (data == NULL)
  316. return BDELTA_MEMORY;
  317. /* Allow dmax < 0 to mean "no limit". */
  318. if (d_max < 0)
  319. d_max = old_size + new_size;
  320. /*
  321. * Compute the farthest-reaching 0-path so the loop after this
  322. * will have a "previous" row to start with.
  323. */
  324. for (x = 0; x < old_size && x < new_size && old[x] == new_[x]; )
  325. x++;
  326. *data = x;
  327. if (x >= old_size && x >= new_size) {
  328. /* Strings are equal, so return a triangle with one row (a dot). */
  329. assert(x == old_size && x == new_size);
  330. triangle_out->data = data;
  331. triangle_out->solution_d = 0;
  332. triangle_out->solution_k = 0;
  333. triangle_out->solution_ptr = data;
  334. return BDELTA_OK;
  335. }
  336. vprev = data;
  337. vcur = v = data + 1;
  338. /*
  339. * Here is the core of the Myers diff algorithm.
  340. *
  341. * This is a direct translation of the pseudocode in Myers' paper,
  342. * with implementation-specific adaptations:
  343. *
  344. * * Every V array is preserved per iteration of the outer loop.
  345. * This is necessary so we can determine the actual patch, not just
  346. * the length of the shortest edit string. See the coment above
  347. * the definition of Triangle for an in-depth explanation.
  348. *
  349. * * Array items are stored consecutively so as to not waste space.
  350. *
  351. * * The buffer holding the V arrays is expanded dynamically.
  352. */
  353. for (d = 1; d <= d_max; d++, vprev = vcur, vcur = v) {
  354. /* Ensure that the buffer has enough room for this row. */
  355. if ((size_t)(v - data + d + 1) > data_alloc) {
  356. size_t vprev_idx = vprev - data;
  357. size_t v_idx = v - data;
  358. size_t vcur_idx = vcur - data;
  359. uint32_t *tmp;
  360. do {
  361. data_alloc *= 2;
  362. } while ((size_t)(v - data + d + 1) > data_alloc);
  363. tmp = realloc(data, data_alloc * sizeof(*data));
  364. if (tmp == NULL) {
  365. free(data);
  366. return BDELTA_MEMORY;
  367. }
  368. data = tmp;
  369. /* Relocate pointers to the buffer we just expanded. */
  370. vprev = data + vprev_idx;
  371. v = data + v_idx;
  372. vcur = data + vcur_idx;
  373. }
  374. for (k = -d; k <= d; k += 2, vprev++) {
  375. if (k == -d || (k != d && vprev[-1] < vprev[0]))
  376. x = vprev[0];
  377. else
  378. x = vprev[-1] + 1;
  379. y = x - k;
  380. while (x < old_size && y < new_size && old[x] == new_[y])
  381. x++, y++;
  382. *v++ = x;
  383. if (x >= old_size && y >= new_size) {
  384. /* Shortest edit string found. */
  385. assert(x == old_size && y == new_size);
  386. triangle_out->data = data;
  387. triangle_out->solution_d = d;
  388. triangle_out->solution_k = k;
  389. triangle_out->solution_ptr = v - 1;
  390. return BDELTA_OK;
  391. }
  392. }
  393. }
  394. free(data);
  395. return BDELTA_INTERNAL_DMAX_EXCEEDED;
  396. }
  397. /*
  398. * Trace a solution back to the top, returning a string of instructions
  399. * for descending from the top to the solution.
  400. *
  401. * An instruction is one of the following:
  402. *
  403. * -1: Descend left.
  404. * +1: Descend right.
  405. * 0: Finished. You should be at the solution now.
  406. *
  407. * If memory allocation fails, this function will return NULL.
  408. */
  409. static signed char *climb_triangle(const Triangle *triangle)
  410. {
  411. signed char *descent;
  412. int d, k;
  413. uint32_t *p;
  414. assert(triangle->solution_d >= 0);
  415. descent = malloc(triangle->solution_d + 1);
  416. if (descent == NULL)
  417. return NULL;
  418. d = triangle->solution_d;
  419. k = triangle->solution_k;
  420. p = triangle->solution_ptr;
  421. descent[d] = 0;
  422. while (d > 0) {
  423. if (k == -d || (k != d && *(p-d-1) < *(p-d))) {
  424. /* Climb right */
  425. k++;
  426. p = p - d;
  427. descent[--d] = -1;
  428. } else {
  429. /* Climb left */
  430. k--;
  431. p = p - d - 1;
  432. descent[--d] = 1;
  433. }
  434. }
  435. return descent;
  436. }
  437. /*
  438. * Generate the actual patch, given data produced by build_triangle and
  439. * climb_triangle. new_ is needed for the content of the insertions.
  440. *
  441. * See the comment above the definition of Triangle. It concisely documents
  442. * how a descent down the triangle corresponds to a patch script.
  443. *
  444. * The resulting patch, including the patch type byte, is appended to patch_out.
  445. *
  446. * Return values:
  447. *
  448. * BDELTA_OK: Success
  449. * BDELTA_MEMORY: Memory allocation failed
  450. */
  451. static BDELTAcode descent_to_patch(
  452. const signed char *descent,
  453. const Triangle *triangle,
  454. const char *new_, uint32_t new_size,
  455. SB *patch_out)
  456. {
  457. const char *new_end = new_ + new_size;
  458. uint32_t *p = triangle->data;
  459. uint32_t *p2;
  460. int d = 0;
  461. int k = 0;
  462. int pending_op = 0;
  463. int current_op;
  464. uint32_t pending_length = 0;
  465. uint32_t copy_length;
  466. if (sb_putc(patch_out, PT_CSI32) != 0)
  467. return BDELTA_MEMORY;
  468. if (*p > 0) {
  469. if (csi32_emit_op(patch_out, OP_COPY, *p, &new_) != BDELTA_OK)
  470. return BDELTA_MEMORY;
  471. }
  472. for (; *descent != 0; descent++, p = p2) {
  473. if (*descent < 0) {
  474. /* Descend left. */
  475. d++;
  476. k--;
  477. p2 = p + d;
  478. current_op = OP_INSERT;
  479. assert(*p2 >= *p);
  480. copy_length = *p2 - *p;
  481. } else {
  482. /* Descend right. */
  483. d++;
  484. k++;
  485. p2 = p + d + 1;
  486. current_op = OP_SKIP;
  487. assert(*p2 > *p);
  488. copy_length = *p2 - *p - 1;
  489. }
  490. if (pending_op == current_op) {
  491. pending_length++;
  492. } else {
  493. if (pending_op != 0) {
  494. if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
  495. return BDELTA_MEMORY;
  496. }
  497. pending_op = current_op;
  498. pending_length = 1;
  499. }
  500. if (copy_length > 0) {
  501. if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
  502. return BDELTA_MEMORY;
  503. pending_op = 0;
  504. if (csi32_emit_op(patch_out, OP_COPY, copy_length, &new_) != BDELTA_OK)
  505. return BDELTA_MEMORY;
  506. }
  507. }
  508. assert(d == triangle->solution_d);
  509. assert(k == triangle->solution_k);
  510. assert(p == triangle->solution_ptr);
  511. /* Emit the last pending op, unless it's a skip. */
  512. if (pending_op != 0 && pending_op != OP_SKIP) {
  513. if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
  514. return BDELTA_MEMORY;
  515. }
  516. assert(new_ == new_end);
  517. return BDELTA_OK;
  518. }
  519. /*
  520. * Generate a patch using Myers' O(ND) algorithm.
  521. *
  522. * The patch is appended to @patch_out, which must be initialized before calling.
  523. *
  524. * Return values:
  525. *
  526. * BDELTA_OK: Success
  527. * BDELTA_MEMORY: Memory allocation failed
  528. * BDELTA_INTERNAL_INPUTS_TOO_LARGE: Input sizes are too large
  529. * BDELTA_INTERNAL_DMAX_EXCEEDED: d_max exceeded (strings are too different)
  530. */
  531. static BDELTAcode diff_myers(
  532. const char *old, size_t old_size,
  533. const char *new_, size_t new_size,
  534. SB *patch_out)
  535. {
  536. Triangle triangle;
  537. signed char *descent;
  538. BDELTAcode rc;
  539. /* Make sure old_size + new_size does not overflow int or uint32_t. */
  540. if (old_size >= UINT32_MAX ||
  541. new_size >= UINT32_MAX - old_size ||
  542. old_size >= (unsigned int)INT_MAX ||
  543. new_size >= (unsigned int)INT_MAX - old_size)
  544. return BDELTA_INTERNAL_INPUTS_TOO_LARGE;
  545. rc = build_triangle(old, old_size, new_, new_size, 1000, &triangle);
  546. if (rc != BDELTA_OK)
  547. return rc;
  548. descent = climb_triangle(&triangle);
  549. if (descent == NULL)
  550. goto oom1;
  551. if (descent_to_patch(descent, &triangle, new_, new_size, patch_out) != BDELTA_OK)
  552. goto oom2;
  553. free(descent);
  554. free(triangle.data);
  555. return BDELTA_OK;
  556. oom2:
  557. free(descent);
  558. oom1:
  559. free(triangle.data);
  560. return BDELTA_MEMORY;
  561. }
  562. BDELTAcode bdelta_diff(
  563. const void *old, size_t old_size,
  564. const void *new_, size_t new_size,
  565. void **patch_out, size_t *patch_size_out)
  566. {
  567. SB patch;
  568. if (sb_init(&patch) != 0)
  569. goto out_of_memory;
  570. if (new_size == 0)
  571. goto emit_new_literally;
  572. if (diff_myers(old, old_size, new_, new_size, &patch) != BDELTA_OK)
  573. goto emit_new_literally;
  574. if (sb_size(&patch) > new_size) {
  575. /*
  576. * A literal copy of new is no longer than this patch.
  577. * All that for nothing.
  578. */
  579. goto emit_new_literally;
  580. }
  581. /*
  582. * Verify that patch, when applied to old, produces the correct text.
  583. * If it doesn't, it's a bug, but fall back to a simple emit
  584. * to avert data corruption.
  585. */
  586. {
  587. void *result;
  588. size_t result_size;
  589. BDELTAcode rc;
  590. int correct;
  591. rc = bdelta_patch(
  592. old, old_size,
  593. patch.start, patch.cur - patch.start,
  594. &result, &result_size
  595. );
  596. switch (rc) {
  597. case BDELTA_OK:
  598. correct = (result_size == new_size &&
  599. memcmp(result, new_, new_size) == 0);
  600. free(result);
  601. break;
  602. case BDELTA_MEMORY:
  603. goto out_of_memory;
  604. default:
  605. correct = 0;
  606. break;
  607. }
  608. if (!correct) {
  609. assert(0);
  610. goto emit_new_literally;
  611. }
  612. }
  613. sb_return(&patch, patch_out, patch_size_out);
  614. return BDELTA_OK;
  615. emit_new_literally:
  616. if (patch.cur != patch.start) {
  617. free(patch.start);
  618. if (sb_init(&patch) != 0)
  619. goto out_of_memory;
  620. }
  621. if (sb_putc(&patch, PT_LITERAL) != 0 || sb_write(&patch, new_, new_size) != 0)
  622. goto out_of_memory;
  623. sb_return(&patch, patch_out, patch_size_out);
  624. return BDELTA_OK;
  625. out_of_memory:
  626. sb_discard(&patch, patch_out, patch_size_out);
  627. return BDELTA_MEMORY;
  628. }
  629. /*
  630. * Return values:
  631. *
  632. * BDELTA_OK: Success
  633. * BDELTA_PATCH_INVALID: Patch is malformed
  634. * BDELTA_PATCH_MISMATCH: Old string is too small
  635. * BDELTA_MEMORY: Memory allocation failed
  636. */
  637. static BDELTAcode patch_csi32(
  638. const unsigned char *o, const unsigned char *oe,
  639. const unsigned char *p, const unsigned char *pe,
  640. SB *new_out)
  641. {
  642. int op;
  643. uint32_t size;
  644. while (csi32_parse_op(&p, pe, &op, &size)) {
  645. if ((op == OP_COPY || op == OP_SKIP) && (size_t)(oe - o) < size) {
  646. /* Copy or skip instruction exceeds length of old string. */
  647. return BDELTA_PATCH_MISMATCH;
  648. }
  649. if (op == OP_COPY || op == OP_INSERT)
  650. sb_need(new_out, size);
  651. switch (op) {
  652. case OP_COPY: /* Copy @size bytes from old string. */
  653. memcpy(new_out->cur, o, size);
  654. new_out->cur += size;
  655. o += size;
  656. break;
  657. case OP_SKIP: /* Skip @size bytes of old string. */
  658. o += size;
  659. break;
  660. case OP_INSERT: /* Insert @size new bytes (from the patch script). */
  661. memcpy(new_out->cur, p, size);
  662. new_out->cur += size;
  663. p += size;
  664. break;
  665. default:
  666. assert(0);
  667. }
  668. }
  669. if (p != pe)
  670. return BDELTA_PATCH_INVALID;
  671. return BDELTA_OK;
  672. out_of_memory:
  673. return BDELTA_MEMORY;
  674. }
  675. BDELTAcode bdelta_patch(
  676. const void *old, size_t old_size,
  677. const void *patch, size_t patch_size,
  678. void **new_out, size_t *new_size_out)
  679. {
  680. const unsigned char *o = old;
  681. const unsigned char *oe = o + old_size;
  682. const unsigned char *p = patch;
  683. const unsigned char *pe = p + patch_size;
  684. SB result;
  685. BDELTAcode rc;
  686. if (sb_init(&result) != 0) {
  687. rc = BDELTA_MEMORY;
  688. goto discard;
  689. }
  690. if (p >= pe) {
  691. rc = BDELTA_PATCH_INVALID;
  692. goto discard;
  693. }
  694. switch (*p++) {
  695. case PT_LITERAL:
  696. if (sb_write(&result, p, pe - p) != 0) {
  697. rc = BDELTA_MEMORY;
  698. goto discard;
  699. }
  700. break;
  701. case PT_CSI32:
  702. rc = patch_csi32(o, oe, p, pe, &result);
  703. if (rc != BDELTA_OK)
  704. goto discard;
  705. break;
  706. default:
  707. rc = BDELTA_PATCH_INVALID;
  708. goto discard;
  709. }
  710. sb_return(&result, new_out, new_size_out);
  711. return BDELTA_OK;
  712. discard:
  713. sb_discard(&result, new_out, new_size_out);
  714. return rc;
  715. }
  716. const char *bdelta_strerror(BDELTAcode code)
  717. {
  718. switch (code) {
  719. case BDELTA_OK:
  720. return "Success";
  721. case BDELTA_MEMORY:
  722. return "Could not allocate memory";
  723. case BDELTA_PATCH_INVALID:
  724. return "Patch is invalid";
  725. case BDELTA_PATCH_MISMATCH:
  726. return "Patch applied to wrong data";
  727. case BDELTA_INTERNAL_DMAX_EXCEEDED:
  728. return "Difference threshold exceeded (internal error)";
  729. case BDELTA_INTERNAL_INPUTS_TOO_LARGE:
  730. return "Inputs are too large (internal error)";
  731. default:
  732. return "Invalid error code";
  733. }
  734. }
  735. void bdelta_perror(const char *s, BDELTAcode code)
  736. {
  737. if (s != NULL && *s != '\0')
  738. fprintf(stderr, "%s: %s\n", s, bdelta_strerror(code));
  739. else
  740. fprintf(stderr, "%s\n", bdelta_strerror(code));
  741. }