edit_distance_lcs.c 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. /** @file
  2. * Defines Longest Common Subsequence distance functions.
  3. *
  4. * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
  5. * MIT license - see LICENSE file for details
  6. */
  7. #include <stdlib.h> /* free, malloc */
  8. #include "edit_distance.h"
  9. #include "edit_distance-params.h"
  10. #include "edit_distance-private.h"
  11. ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen,
  12. const ed_elem *tgt, ed_size tlen)
  13. {
  14. /* Optimization: Avoid malloc when row of distance matrix can fit on
  15. * the stack.
  16. */
  17. ed_dist stackdist[ED_STACK_DIST_VALS];
  18. /* One row of the Wagner-Fischer distance matrix. */
  19. ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
  20. malloc((slen + 1) * sizeof(ed_dist));
  21. /* Initialize row with cost to delete src[0..i-1] */
  22. dist[0] = 0;
  23. for (ed_size i = 1; i <= slen; ++i) {
  24. dist[i] = dist[i - 1] + ED_DEL_COST(src[i - 1]);
  25. }
  26. for (ed_size j = 1; j <= tlen; ++j) {
  27. /* Value for dist[j-1][i-1] (one row up, one col left). */
  28. ed_dist diagdist = dist[0];
  29. dist[0] = dist[0] + ED_INS_COST(tgt[j - 1]);
  30. /* Loop invariant: dist[i] is the edit distance between first j
  31. * elements of tgt and first i elements of src.
  32. */
  33. for (ed_size i = 1; i <= slen; ++i) {
  34. ed_dist nextdiagdist = dist[i];
  35. if (ED_ELEM_EQUAL(src[i - 1], tgt[j - 1])) {
  36. /* Same as tgt upto j-2, src upto i-2. */
  37. dist[i] = diagdist;
  38. } else {
  39. /* Insertion is tgt upto j-2, src upto i-1
  40. * + insert tgt[j-1] */
  41. ed_dist insdist =
  42. dist[i] + ED_INS_COST(tgt[j - 1]);
  43. /* Deletion is tgt upto j-1, src upto i-2
  44. * + delete src[i-1] */
  45. ed_dist deldist =
  46. dist[i - 1] + ED_DEL_COST(src[i - 1]);
  47. /* Use best distance available */
  48. dist[i] = ED_MIN2(insdist, deldist);
  49. }
  50. diagdist = nextdiagdist;
  51. }
  52. }
  53. ed_dist total = dist[slen];
  54. if (dist != stackdist) {
  55. free(dist);
  56. }
  57. return total;
  58. }