diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index 3113a8151..54a2542c6 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -7,6 +7,7 @@ add_compile_options("$<$>:${PARSEC_ATOMIC_SUPPOR # Settings for targets # set(BASE_SOURCES + class/parsec_biased_rwlock.c class/parsec_dequeue.c class/parsec_fifo.c class/parsec_lifo.c diff --git a/parsec/class/parsec_biased_rwlock.c b/parsec/class/parsec_biased_rwlock.c new file mode 100644 index 000000000..82698a779 --- /dev/null +++ b/parsec/class/parsec_biased_rwlock.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2009-2022 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#include "parsec/class/parsec_biased_rwlock.h" + +#include + +#include "parsec/runtime.h" +#include "parsec/constants.h" +#include "parsec/execution_stream.h" +#include "parsec/sys/atomic.h" +#include "parsec/class/parsec_rwlock.h" + +/** + * An implementation of the BRAVO biased reader/writer lock wrapper. + * The goal of the BRAVO lock wrapper is to avoid contending the atomic + * rwlock with reader locks, instead having threads mark their read status + * is an array. A writer will first take the rwlock, signal that a writer + * is active, and then wait for all readers to complete. New readers will + * see that a writer is active and wait for the reader lock to become available. + * + * This is clearly biased towards readers so this implementation is meant for + * cases where the majority of accesses is reading and only occasional writes occur. + * + * The paper presenting this technique is available at: + * https://arxiv.org/abs/1810.01553 + * + * While the original implementation uses a global hash table, we use a smaller table + * per lock. In PaRSEC, we know the number of threads we control up front. + * We simply pad for a cache line. If an unknown thread tries to take the lock against + * all odds, it falls back to taking the reader lock. + */ + +struct parsec_biased_rwlock_t { + parsec_atomic_rwlock_t rw_lock; /**< underlying reader-writer lock */ + int32_t reader_bias; /**< whether locking is biased towards readers, will change if a writer occurs */ + uint32_t num_reader; /**< size of the reader_active field */ + uint8_t reader_active[]; /**< array with flags signalling reading threads */ +}; + +#define DEFAULT_CACHE_SIZE 64 + +int parsec_biased_rwlock_init(parsec_biased_rwlock_t **lock) { + parsec_biased_rwlock_t *res; + parsec_execution_stream_t *es = parsec_my_execution_stream(); + if (NULL == es) { + /* should be called from a parsec thread */ + res = (parsec_biased_rwlock_t *)malloc(sizeof(parsec_biased_rwlock_t)); + res->num_reader = 0; + res->reader_bias = 0; // disable reader biasing + } else { + uint32_t num_threads = es->virtual_process->nb_cores; + /* one cache line per reader */ + uint32_t num_reader = num_threads*DEFAULT_CACHE_SIZE; + res = (parsec_biased_rwlock_t *)malloc(sizeof(parsec_biased_rwlock_t) + num_reader*sizeof(uint8_t)); + res->reader_bias = 1; + res->num_reader = num_reader; + memset(res->reader_active, 0, num_reader); + } + parsec_atomic_rwlock_init(&res->rw_lock); + *lock = res; + + return PARSEC_SUCCESS; +} + +void parsec_biased_rwlock_rdlock(parsec_biased_rwlock_t *lock) +{ + parsec_execution_stream_t *es = parsec_my_execution_stream(); + if (PARSEC_UNLIKELY(NULL == es || lock->num_reader == 0)) { + /* fall back to the underlying rwlock */ + parsec_atomic_rwlock_rdlock(&lock->rw_lock); + return; + } + + if (PARSEC_UNLIKELY(!lock->reader_bias)) { + /* a writer is active, wait for the rwlock to become available */ + parsec_atomic_rwlock_rdlock(&lock->rw_lock); + return; + } + + /* fast-path: no writer, simply mark as active reader and make sure there is no race */ + size_t reader_entry = es->th_id*DEFAULT_CACHE_SIZE; + assert(reader_entry >= 0 && reader_entry < lock->num_reader); + assert(lock->reader_active[reader_entry] == 0); + + lock->reader_active[reader_entry] = 1; + /* make sure the writer check is not moved to before setting the flag */ + parsec_atomic_rmb(); + /* double check that no writer came in between */ + if (PARSEC_UNLIKELY(!lock->reader_bias)) { + /* a writer has become active, fallback to the rwlock */ + lock->reader_active[reader_entry] = 0; + parsec_atomic_rwlock_rdlock(&lock->rw_lock); + } +} + +void parsec_biased_rwlock_rdunlock(parsec_biased_rwlock_t *lock) +{ + parsec_execution_stream_t *es = parsec_my_execution_stream(); + + if (PARSEC_UNLIKELY(NULL == es || lock->num_reader == 0)) { + /* fall back to the underlying rwlock */ + parsec_atomic_rwlock_rdunlock(&lock->rw_lock); + return; + } + + size_t reader_entry = es->th_id*DEFAULT_CACHE_SIZE; + assert(reader_entry >= 0 && reader_entry < lock->num_reader); + + if (PARSEC_UNLIKELY(lock->reader_active[reader_entry] == 0)) { + /* we had to take a lock, give it back */ + parsec_atomic_rwlock_rdunlock(&lock->rw_lock); + } else { + lock->reader_active[reader_entry] = 0; + } +} + +void parsec_biased_rwlock_wrlock(parsec_biased_rwlock_t *lock) +{ + /* acquire the writer lock first */ + parsec_atomic_rwlock_wrlock(&lock->rw_lock); + + lock->reader_bias = 0; + + /* make sure the reads below are not moved before the write */ + parsec_atomic_wmb(); + + /* wait for all current reader to complete */ + for (uint32_t i = 0; i < lock->num_reader; ++i) { + while (lock->reader_active[i] != 0) { + static struct timespec ts = { .tv_sec = 0, .tv_nsec = 100 }; + nanosleep(&ts, NULL); + } + } +} + +void parsec_biased_rwlock_wrunlock(parsec_biased_rwlock_t *lock) +{ + assert(lock->reader_bias == 0); + if (lock->num_reader > 0) { + /* re-enable reader bias, if we support it */ + lock->reader_bias = 1; + } + parsec_atomic_rwlock_wrunlock(&lock->rw_lock); +} diff --git a/parsec/class/parsec_biased_rwlock.h b/parsec/class/parsec_biased_rwlock.h new file mode 100644 index 000000000..ab04b6cc0 --- /dev/null +++ b/parsec/class/parsec_biased_rwlock.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2009-2022 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ +#ifndef _parsec_biased_rwlock_h +#define _parsec_biased_rwlock_h + +#include "parsec/parsec_config.h" + +/** + * An implementation of the BRAVO biased reader/writer lock wrapper. + * The goal of the BRAVO lock wrapper is to avoid contending the atomic + * rwlock with reader locks, instead having threads mark their read status + * is an array. A writer will first take the rwlock, signal that a writer + * is active, and then wait for all readers to complete. New readers will + * see that a writer is active and wait for the reader lock to become available. + * + * This is clearly biased towards readers so this implementation is meant for + * cases where the majority of accesses is reading and only occasional writes occur. + * + * The paper presenting this technique is available at: + * https://arxiv.org/abs/1810.01553 + * + * While the original implementation uses a global hash table, we use a smaller table + * per lock. In PaRSEC, we know the number of threads we control up front. + * We simply pad for a cache line. If an unknown thread tries to take the lock against + * all odds, it falls back to taking the reader lock. + */ + +/* fwd-decl */ +typedef struct parsec_biased_rwlock_t parsec_biased_rwlock_t; + +int parsec_biased_rwlock_init(parsec_biased_rwlock_t **lock); + +void parsec_biased_rwlock_rdlock(parsec_biased_rwlock_t *lock); + +void parsec_biased_rwlock_rdunlock(parsec_biased_rwlock_t *lock); + +void parsec_biased_rwlock_wrlock(parsec_biased_rwlock_t *lock); + +void parsec_biased_rwlock_wrunlock(parsec_biased_rwlock_t *lock); + +#endif // _parsec_biased_rwlock_h diff --git a/parsec/class/parsec_hash_table.c b/parsec/class/parsec_hash_table.c index 252943f36..c3e7a8405 100644 --- a/parsec/class/parsec_hash_table.c +++ b/parsec/class/parsec_hash_table.c @@ -89,7 +89,6 @@ int parsec_hash_tables_init(void) void parsec_hash_table_init(parsec_hash_table_t *ht, int64_t offset, int nb_bits, parsec_key_fn_t key_functions, void *data) { - parsec_atomic_rwlock_t unlock = { PARSEC_RWLOCK_UNLOCKED }; parsec_hash_table_head_t *head; size_t i; int v; @@ -120,7 +119,7 @@ void parsec_hash_table_init(parsec_hash_table_t *ht, int64_t offset, int nb_bits head->next = NULL; head->next_to_free = NULL; ht->rw_hash = head; - ht->rw_lock = unlock; + parsec_biased_rwlock_init(&ht->rw_lock); for( i = 0; i < (1ULL<buckets[i].lock); @@ -242,7 +241,7 @@ void parsec_hash_table_lock_bucket(parsec_hash_table_t *ht, parsec_key_t key ) { uint64_t hash; - parsec_atomic_rwlock_rdlock(&ht->rw_lock); + parsec_biased_rwlock_rdlock(ht->rw_lock); hash = parsec_hash_table_universal_rehash(ht->key_functions.key_hash(key, ht->hash_data), ht->rw_hash->nb_bits); assert( hash < (1ULL<rw_hash->nb_bits) ); parsec_atomic_lock(&ht->rw_hash->buckets[hash].lock); @@ -290,17 +289,17 @@ void parsec_hash_table_unlock_bucket_impl(parsec_hash_table_t *ht, parsec_key_t } cur_head = ht->rw_hash; parsec_atomic_unlock(&ht->rw_hash->buckets[hash].lock); - parsec_atomic_rwlock_rdunlock(&ht->rw_lock); + parsec_biased_rwlock_rdunlock(ht->rw_lock); if( resize ) { - parsec_atomic_rwlock_wrlock(&ht->rw_lock); + parsec_biased_rwlock_wrlock(ht->rw_lock); if( cur_head == ht->rw_hash ) { /* Barring ABA problems, nobody resized the hash table; * Good enough hint that it's our role to do so */ parsec_hash_table_resize(ht); } /* Otherwise, let's asssume somebody resized already */ - parsec_atomic_rwlock_wrunlock(&ht->rw_lock); + parsec_biased_rwlock_wrunlock(ht->rw_lock); } } @@ -535,7 +534,7 @@ void parsec_hash_table_insert_impl(parsec_hash_table_t *ht, parsec_hash_table_it uint64_t hash; parsec_hash_table_head_t *cur_head; int resize = 0; - parsec_atomic_rwlock_rdlock(&ht->rw_lock); + parsec_biased_rwlock_rdlock(ht->rw_lock); cur_head = ht->rw_hash; hash = parsec_hash_table_universal_rehash(ht->key_functions.key_hash(item->key, ht->hash_data), ht->rw_hash->nb_bits); assert( hash < (1ULL<rw_hash->nb_bits) ); @@ -553,17 +552,17 @@ void parsec_hash_table_insert_impl(parsec_hash_table_t *ht, parsec_hash_table_it } } parsec_atomic_unlock(&ht->rw_hash->buckets[hash].lock); - parsec_atomic_rwlock_rdunlock(&ht->rw_lock); + parsec_biased_rwlock_rdunlock(ht->rw_lock); if( resize ) { - parsec_atomic_rwlock_wrlock(&ht->rw_lock); + parsec_biased_rwlock_wrlock(ht->rw_lock); if( cur_head == ht->rw_hash ) { /* Barring ABA problems, nobody resized the hash table; * Good enough hint that it's our role to do so */ parsec_hash_table_resize(ht); } /* Otherwise, let's asssume somebody resized already */ - parsec_atomic_rwlock_wrunlock(&ht->rw_lock); + parsec_biased_rwlock_wrunlock(ht->rw_lock); } } @@ -571,13 +570,13 @@ void *parsec_hash_table_find(parsec_hash_table_t *ht, parsec_key_t key) { uint64_t hash; void *ret; - parsec_atomic_rwlock_rdlock(&ht->rw_lock); + parsec_biased_rwlock_rdlock(ht->rw_lock); hash = parsec_hash_table_universal_rehash(ht->key_functions.key_hash(key, ht->hash_data), ht->rw_hash->nb_bits); assert( hash < (1ULL<rw_hash->nb_bits) ); parsec_atomic_lock(&ht->rw_hash->buckets[hash].lock); ret = parsec_hash_table_nolock_find(ht, key); parsec_atomic_unlock(&ht->rw_hash->buckets[hash].lock); - parsec_atomic_rwlock_rdunlock(&ht->rw_lock); + parsec_biased_rwlock_rdunlock(ht->rw_lock); return ret; } @@ -585,13 +584,13 @@ void *parsec_hash_table_remove(parsec_hash_table_t *ht, parsec_key_t key) { uint64_t hash; void *ret; - parsec_atomic_rwlock_rdlock(&ht->rw_lock); + parsec_biased_rwlock_rdlock(ht->rw_lock); hash = parsec_hash_table_universal_rehash(ht->key_functions.key_hash(key, ht->hash_data), ht->rw_hash->nb_bits); assert( hash < (1ULL<rw_hash->nb_bits) ); parsec_atomic_lock(&ht->rw_hash->buckets[hash].lock); ret = parsec_hash_table_nolock_remove(ht, key); parsec_atomic_unlock(&ht->rw_hash->buckets[hash].lock); - parsec_atomic_rwlock_rdunlock(&ht->rw_lock); + parsec_biased_rwlock_rdunlock(ht->rw_lock); return ret; } diff --git a/parsec/class/parsec_hash_table.h b/parsec/class/parsec_hash_table.h index 83339f310..7ea36cf3a 100644 --- a/parsec/class/parsec_hash_table.h +++ b/parsec/class/parsec_hash_table.h @@ -11,6 +11,7 @@ #include "parsec/sys/atomic.h" #include "parsec/class/list_item.h" #include "parsec/class/parsec_rwlock.h" +#include "parsec/class/parsec_biased_rwlock.h" /** * @defgroup parsec_internal_classes_hashtable Hash Tables @@ -74,7 +75,7 @@ typedef struct parsec_hash_table_head_s { */ struct parsec_hash_table_s { parsec_object_t super; /**< A Hash Table is a PaRSEC object */ - parsec_atomic_rwlock_t rw_lock; /**< 'readers' are threads that manipulate rw_hash (add, delete, find) + parsec_biased_rwlock_t *rw_lock; /**< 'readers' are threads that manipulate rw_hash (add, delete, find) * but do not resize it; 'writers' are threads that resize * rw_hash */ int64_t elt_hashitem_offset; /**< Elements belonging to this hash table have a parsec_hash_table_item_t