| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- /* Simple speed tests for a hash of strings. */
- #include <ccan/htable/htable_type.h>
- #include <ccan/htable/htable.c>
- #include <ccan/str_talloc/str_talloc.h>
- #include <ccan/grab_file/grab_file.h>
- #include <ccan/talloc/talloc.h>
- #include <ccan/hash/hash.h>
- #include <ccan/time/time.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #include <unistd.h>
- #include <sys/time.h>
- static size_t hashcount;
- static const char *strkey(const char *str)
- {
- return str;
- }
- static size_t hash_str(const char *key)
- {
- hashcount++;
- return hash(key, strlen(key), 0);
- }
- static bool cmp(const char *obj, const char *key)
- {
- return strcmp(obj, key) == 0;
- }
- HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, str);
- /* Nanoseconds per operation */
- static size_t normalize(const struct timeval *start,
- const struct timeval *stop,
- unsigned int num)
- {
- struct timeval diff;
- timersub(stop, start, &diff);
- /* Floating point is more accurate here. */
- return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
- / num * 1000;
- }
- int main(int argc, char *argv[])
- {
- size_t i, j, num;
- struct timeval start, stop;
- struct htable_str *ht;
- char **words, **misswords;
- words = strsplit(NULL, grab_file(NULL,
- argv[1] ? argv[1] : "/usr/share/dict/words",
- NULL), "\n");
- ht = htable_str_new();
- num = talloc_array_length(words) - 1;
- printf("%zu words\n", num);
- /* Append and prepend last char for miss testing. */
- misswords = talloc_array(words, char *, num);
- for (i = 0; i < num; i++) {
- char lastc;
- if (strlen(words[i]))
- lastc = words[i][strlen(words[i])-1];
- else
- lastc = 'z';
- misswords[i] = talloc_asprintf(misswords, "%c%s%c%c",
- lastc, words[i], lastc, lastc);
- }
- printf("#01: Initial insert: ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++)
- htable_str_add(ht, words[i]);
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("Bytes allocated: %zu\n",
- sizeof(((struct htable *)ht)->table[0])
- << ((struct htable *)ht)->bits);
- printf("#02: Initial lookup (match): ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++)
- if (htable_str_get(ht, words[i]) != words[i])
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#03: Initial lookup (miss): ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++) {
- if (htable_str_get(ht, misswords[i]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- /* Lookups in order are very cache-friendly for judy; try random */
- printf("#04: Initial lookup (random): ");
- fflush(stdout);
- start = time_now();
- for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
- if (htable_str_get(ht, words[j]) != words[j])
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- hashcount = 0;
- printf("#05: Initial delete all: ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++)
- if (!htable_str_del(ht, words[i]))
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#06: Initial re-inserting: ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++)
- htable_str_add(ht, words[i]);
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- hashcount = 0;
- printf("#07: Deleting first half: ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i+=2)
- if (!htable_str_del(ht, words[i]))
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#08: Adding (a different) half: ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i+=2)
- htable_str_add(ht, misswords[i]);
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#09: Lookup after half-change (match): ");
- fflush(stdout);
- start = time_now();
- for (i = 1; i < num; i+=2)
- if (htable_str_get(ht, words[i]) != words[i])
- abort();
- for (i = 0; i < num; i+=2) {
- if (htable_str_get(ht, misswords[i]) != misswords[i])
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#10: Lookup after half-change (miss): ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i+=2)
- if (htable_str_get(ht, words[i]))
- abort();
- for (i = 1; i < num; i+=2) {
- if (htable_str_get(ht, misswords[i]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- /* Hashtables with delete markers can fill with markers over time.
- * so do some changes to see how it operates in long-term. */
- printf("#11: Churn 1: ");
- start = time_now();
- for (j = 0; j < num; j+=2) {
- if (!htable_str_del(ht, misswords[j]))
- abort();
- if (!htable_str_add(ht, words[j]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#12: Churn 2: ");
- start = time_now();
- for (j = 1; j < num; j+=2) {
- if (!htable_str_del(ht, words[j]))
- abort();
- if (!htable_str_add(ht, misswords[j]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#13: Churn 3: ");
- start = time_now();
- for (j = 1; j < num; j+=2) {
- if (!htable_str_del(ht, misswords[j]))
- abort();
- if (!htable_str_add(ht, words[j]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- /* Now it's back to normal... */
- printf("#14: Post-Churn lookup (match): ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++)
- if (htable_str_get(ht, words[i]) != words[i])
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- printf("#15: Post-Churn lookup (miss): ");
- fflush(stdout);
- start = time_now();
- for (i = 0; i < num; i++) {
- if (htable_str_get(ht, misswords[i]))
- abort();
- }
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- /* Lookups in order are very cache-friendly for judy; try random */
- printf("#16: Post-Churn lookup (random): ");
- fflush(stdout);
- start = time_now();
- for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
- if (htable_str_get(ht, words[j]) != words[j])
- abort();
- stop = time_now();
- printf(" %zu ns\n", normalize(&start, &stop, num));
- return 0;
- }
|