From c6e7560d876558b2f3747aa9d6223e6cedfb97b4 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Tue, 22 Sep 2015 16:18:57 +0100 Subject: [PATCH] Add bit set data structure --- .gitignore | 1 + kbitset.h | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test/Makefile | 5 +- test/kbitset_test.c | 94 ++++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 kbitset.h create mode 100644 test/kbitset_test.c diff --git a/.gitignore b/.gitignore index 010a8eb..2630cb4 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ contents.xcworkspacedata ._* # Test byproducts +test/kbitset_test test/kbtree_test test/khash_keith test/khash_keith2 diff --git a/kbitset.h b/kbitset.h new file mode 100644 index 0000000..9e01abd --- /dev/null +++ b/kbitset.h @@ -0,0 +1,160 @@ +/* The MIT License + + Copyright (C) 2015 Genome Research Ltd. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef KBITSET_H +#define KBITSET_H + +/* Example of using kbitset_t, which represents a subset of {0,..., N-1}, + where N is the size specified in kbs_init(). + + kbitset_t *bset = kbs_init(100); + kbs_insert(bset, 5); + kbs_insert(bset, 68); + kbs_delete(bset, 37); + // ... + + if (kbs_exists(bset, 68)) printf("68 present\n"); + + kbitset_iter_t itr; + int i; + kbs_start(&itr); + while ((i = kbs_next(bset, &itr)) >= 0) + printf("%d present\n", i); + + kbs_destroy(bset); + + Example of declaring a kbitset_t-using function in a header file, so that + only source files that actually use process() need to include : + + struct kbitset_t; + void process(struct kbitset_t *bset); +*/ + +#include +#include +#include + +#define KBS_ELTBITS (CHAR_BIT * sizeof (unsigned long)) +#define KBS_ELT(i) ((i) / KBS_ELTBITS) +#define KBS_MASK(i) (1UL << ((i) % KBS_ELTBITS)) + +typedef struct kbitset_t { + size_t n; + unsigned long b[1]; +} kbitset_t; + +// Initialise a bit set capable of holding ni integers, 0 <= i < ni. +// The set returned is empty if fill == 0, or all of [0,ni) otherwise. +static inline kbitset_t *kbs_init2(size_t ni, int fill) +{ + size_t n = (ni + KBS_ELTBITS-1) / KBS_ELTBITS; + kbitset_t *bs = + (kbitset_t *) malloc(sizeof(kbitset_t) + n * sizeof(unsigned long)); + if (bs == NULL) return NULL; + bs->n = n; + memset(bs->b, fill? ~0 : 0, n * sizeof (unsigned long)); + bs->b[n] = ~0UL; + return bs; +} + +// Initialise an empty bit set capable of holding ni integers, 0 <= i < ni. +static inline kbitset_t *kbs_init(size_t ni) +{ + return kbs_init2(ni, 0); +} + +// Destroy a bit set. +static inline void kbs_destroy(kbitset_t *bs) +{ + free(bs); +} + +// Reset the bit set to empty. +static inline void kbs_clear(kbitset_t *bs) +{ + memset(bs->b, 0, bs->n * sizeof (unsigned long)); +} + +// Reset the bit set to all of [0,ni). +static inline void kbs_insert_all(kbitset_t *bs) +{ + memset(bs->b, ~0, bs->n * sizeof (unsigned long)); +} + +// Insert an element into the bit set. +static inline void kbs_insert(kbitset_t *bs, int i) +{ + bs->b[KBS_ELT(i)] |= KBS_MASK(i); +} + +// Remove an element from the bit set. +static inline void kbs_delete(kbitset_t *bs, int i) +{ + bs->b[KBS_ELT(i)] &= ~KBS_MASK(i); +} + +// Test whether the bit set contains the element. +static inline int kbs_exists(const kbitset_t *bs, int i) +{ + return (bs->b[KBS_ELT(i)] & KBS_MASK(i)) != 0; +} + +typedef struct kbitset_iter_t { + unsigned long mask; + size_t elt; + int i; +} kbitset_iter_t; + +// Initialise or reset a bit set iterator. +static inline void kbs_start(kbitset_iter_t *itr) +{ + itr->mask = 1; + itr->elt = 0; + itr->i = 0; +} + +// Return the next element contained in the bit set, or -1 if there are no more. +static inline int kbs_next(const kbitset_t *bs, kbitset_iter_t *itr) +{ + unsigned long b = bs->b[itr->elt]; + + for (;;) { + if (itr->mask == 0) { + while ((b = bs->b[++itr->elt]) == 0) itr->i += KBS_ELTBITS; + if (itr->elt == bs->n) return -1; + itr->mask = 1; + } + + if (b & itr->mask) break; + + itr->i++; + itr->mask <<= 1; + } + + itr->mask <<= 1; + return itr->i++; +} + +#endif diff --git a/test/Makefile b/test/Makefile index 44d9eab..1d42e59 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,7 +4,7 @@ CFLAGS=-g -Wall -O2 -I.. CXXFLAGS=$(CFLAGS) PROGS=kbtree_test khash_keith khash_keith2 khash_test klist_test kseq_test kseq_bench \ kseq_bench2 ksort_test ksort_test-stl kvec_test kmin_test kstring_bench kstring_bench2 kstring_test \ - kthread_test2 + kthread_test2 kbitset_test all:$(PROGS) @@ -61,3 +61,6 @@ kthread_test:kthread_test.c ../kthread.c kthread_test2:kthread_test2.c ../kthread.c $(CC) $(CFLAGS) -o $@ kthread_test2.c ../kthread.c + +kbitset_test:kbitset_test.c ../kbitset.h + $(CC) $(CFLAGS) -o $@ kbitset_test.c diff --git a/test/kbitset_test.c b/test/kbitset_test.c new file mode 100644 index 0000000..f7e15bb --- /dev/null +++ b/test/kbitset_test.c @@ -0,0 +1,94 @@ +#include +#include +#include + +struct kbitset_t; +void check(struct kbitset_t *bset, const int present[], const char *title); + +#include "kbitset.h" + +int nfail = 0; + +void fail(const char *format, ...) +{ + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + nfail++; +} + +void check(kbitset_t *bset, const int present[], const char *title) +{ + kbitset_iter_t itr; + int i, j, n, nn; + + for (i = 0; present[i] >= 0; i++) kbs_insert(bset, present[i]); + nn = i; + + for (i = j = n = 0; i < 600; i++) + if (kbs_exists(bset, i)) { + n++; + if (i == present[j]) j++; + else fail("%s: %d should not be in the set\n", title, i); + } + else { + if (i == present[j]) { + fail("%s: %d should be in the set\n", title, i); + j++; + } + } + + if (n != nn) + fail("%s: expected %d elements; found %d\n", title, nn, n); + + j = n = 0; + kbs_start(&itr); + while ((i = kbs_next(bset, &itr)) >= 0) { + n++; + if (i == present[j]) j++; + else fail("%s: %d should not be returned by iterator\n", title, i); + } + + if (n != nn) + fail("%s: expected %d elements; iterator found %d\n", title, nn, n); +} + +// Element boundaries +#define B KBS_ELTBITS + +const int test1[] = { + 0, 1, 6, 10, 20, 22, 24, + B-1, B, 2*B-1, 3*B, + 4*B-2, 4*B-1, 4*B, 4*B+1, + 512, 513, + -1 +}; + +const int test2[] = { + 3*B+5, 4*B-10, 500, 501, 502, 503, 504, 505, 506, 599, + -1 +}; + +int main(int argc, char **argv) +{ + kbitset_t *bset = kbs_init(600); + + check(bset, test1, "test1"); + kbs_delete(bset, 0); + kbs_delete(bset, 1); + kbs_delete(bset, 6); + check(bset, &test1[3], "test1a"); + + kbs_clear(bset); + check(bset, test2, "test2"); + + kbs_destroy(bset); + + if (nfail > 0) { + fprintf(stderr, "Total failures: %d\n", nfail); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +}