* kmp_alloc.cpp -- private/shared dynamic memory allocation and management
*/
#include "kmp.h"
#include "kmp_io.h"
#include "kmp_wrapper_malloc.h"
#if KMP_USE_BGET
typedef int (*bget_compact_t)(size_t, int);
typedef void *(*bget_acquire_t)(size_t);
typedef void (*bget_release_t)(void *);
#if KMP_OS_WINDOWS
#if KMP_ARCH_X86 || KMP_ARCH_ARM
typedef kmp_int32 bufsize;
#else
typedef kmp_int64 bufsize;
#endif
#else
typedef ssize_t bufsize;
#endif
typedef enum bget_mode {
bget_mode_fifo = 0,
bget_mode_lifo = 1,
bget_mode_best = 2
} bget_mode_t;
static void bpool(kmp_info_t *th, void *buffer, bufsize len);
static void *bget(kmp_info_t *th, bufsize size);
static void *bgetz(kmp_info_t *th, bufsize size);
static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
static void brel(kmp_info_t *th, void *buf);
static void bectl(kmp_info_t *th, bget_compact_t compact,
bget_acquire_t acquire, bget_release_t release,
bufsize pool_incr);
multiple of this size. This MUST be a power of two. */
ensure 16 byte alignment */
#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
#define SizeQuant 8
#define AlignType double
#else
#define SizeQuant 16
#define AlignType _Quad
#endif
#define BufStats 1
#ifdef KMP_DEBUG
#define BufDump 1
#define BufValid 1
#define DumpData 1
#ifdef NOT_USED_NOW
#define FreeWipe 1
#define BestFit 1
#endif
#endif
static bufsize bget_bin_size[] = {
0,
1 << 7,
1 << 8,
1 << 9,
1 << 10,
1 << 11,
1 << 12, 1 << 13,
1 << 14, 1 << 15,
1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
1 << 21,
1 << 22,
1 << 23,
1 << 24,
1 << 25,
};
#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
struct bfhead;
typedef struct qlinks {
struct bfhead *flink;
struct bfhead *blink;
} qlinks_t;
typedef struct bhead2 {
kmp_info_t *bthr;
bufsize prevfree;
0 if previous buffer is allocated. */
bufsize bsize;
} bhead2_t;
typedef union bhead {
KMP_ALIGN(SizeQuant)
AlignType b_align;
char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
bhead2_t bb;
} bhead_t;
#define BH(p) ((bhead_t *)(p))
typedef struct bdhead {
bufsize tsize;
bhead_t bh;
} bdhead_t;
#define BDH(p) ((bdhead_t *)(p))
typedef struct bfhead {
bhead_t bh;
qlinks_t ql;
} bfhead_t;
#define BFH(p) ((bfhead_t *)(p))
typedef struct thr_data {
bfhead_t freelist[MAX_BGET_BINS];
#if BufStats
size_t totalloc;
long numget, numrel;
long numpblk;
long numpget, numprel;
long numdget, numdrel;
#endif
bget_compact_t compfcn;
bget_acquire_t acqfcn;
bget_release_t relfcn;
bget_mode_t mode;
bufsize exp_incr;
bufsize pool_len;
-1: not all pool blocks are the same size
>0: (common) block size for all bpool calls made so far
*/
bfhead_t *last_pool;
} thr_data_t;
#define QLSize (sizeof(qlinks_t))
#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
#define MaxSize \
(bufsize)( \
~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
end of pool block. The most negative number which will fit in a
bufsize, defined in a way that the compiler will accept. */
#define ESent \
((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
static int bget_get_bin(bufsize size) {
int lo = 0, hi = MAX_BGET_BINS - 1;
KMP_DEBUG_ASSERT(size > 0);
while ((hi - lo) > 1) {
int mid = (lo + hi) >> 1;
if (size < bget_bin_size[mid])
hi = mid - 1;
else
lo = mid;
}
KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
return lo;
}
static void set_thr_data(kmp_info_t *th) {
int i;
thr_data_t *data;
data = (thr_data_t *)((!th->th.th_local.bget_data)
? __kmp_allocate(sizeof(*data))
: th->th.th_local.bget_data);
memset(data, '\0', sizeof(*data));
for (i = 0; i < MAX_BGET_BINS; ++i) {
data->freelist[i].ql.flink = &data->freelist[i];
data->freelist[i].ql.blink = &data->freelist[i];
}
th->th.th_local.bget_data = data;
th->th.th_local.bget_list = 0;
#if !USE_CMP_XCHG_FOR_BGET
#ifdef USE_QUEUING_LOCK_FOR_BGET
__kmp_init_lock(&th->th.th_local.bget_lock);
#else
__kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
#endif
#endif
}
static thr_data_t *get_thr_data(kmp_info_t *th) {
thr_data_t *data;
data = (thr_data_t *)th->th.th_local.bget_data;
KMP_DEBUG_ASSERT(data != 0);
return data;
}
static void __kmp_bget_dequeue(kmp_info_t *th) {
void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
if (p != 0) {
#if USE_CMP_XCHG_FOR_BGET
{
volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
CCAST(void *, old_value), nullptr)) {
KMP_CPU_PAUSE();
old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
}
p = CCAST(void *, old_value);
}
#else
#ifdef USE_QUEUING_LOCK_FOR_BGET
__kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
#else
__kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
#endif
p = (void *)th->th.th_local.bget_list;
th->th.th_local.bget_list = 0;
#ifdef USE_QUEUING_LOCK_FOR_BGET
__kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
#else
__kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
#endif
#endif
while (p != 0) {
void *buf = p;
bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
(kmp_uintptr_t)th);
KMP_DEBUG_ASSERT(b->ql.blink == 0);
p = (void *)b->ql.flink;
brel(th, buf);
}
}
}
static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
#ifdef USE_QUEUING_LOCK_FOR_BGET
,
kmp_int32 rel_gtid
#endif
) {
bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
(kmp_uintptr_t)th);
b->ql.blink = 0;
KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
__kmp_gtid_from_thread(th)));
#if USE_CMP_XCHG_FOR_BGET
{
volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
exposing a broken list to other threads, even for an instant. */
b->ql.flink = BFH(CCAST(void *, old_value));
while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
CCAST(void *, old_value), buf)) {
KMP_CPU_PAUSE();
old_value = TCR_PTR(th->th.th_local.bget_list);
exposing a broken list to other threads, even for an instant. */
b->ql.flink = BFH(CCAST(void *, old_value));
}
}
#else
#ifdef USE_QUEUING_LOCK_FOR_BGET
__kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
#else
__kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
#endif
b->ql.flink = BFH(th->th.th_local.bget_list);
th->th.th_local.bget_list = (void *)buf;
#ifdef USE_QUEUING_LOCK_FOR_BGET
__kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
#else
__kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
#endif
#endif
}
static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
int bin;
KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
bin = bget_get_bin(b->bh.bb.bsize);
KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
&thr->freelist[bin]);
KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
&thr->freelist[bin]);
b->ql.flink = &thr->freelist[bin];
b->ql.blink = thr->freelist[bin].ql.blink;
thr->freelist[bin].ql.blink = b;
b->ql.blink->ql.flink = b;
}
static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
b->ql.blink->ql.flink = b->ql.flink;
b->ql.flink->ql.blink = b->ql.blink;
}
static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
thr_data_t *thr = get_thr_data(th);
int bin;
*total_free = *max_free = 0;
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
bfhead_t *b, *best;
best = &thr->freelist[bin];
b = best->ql.flink;
while (b != &thr->freelist[bin]) {
*total_free += (b->bh.bb.bsize - sizeof(bhead_t));
if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
best = b;
b = b->ql.flink;
}
if (*max_free < best->bh.bb.bsize)
*max_free = best->bh.bb.bsize;
}
if (*max_free > (bufsize)sizeof(bhead_t))
*max_free -= sizeof(bhead_t);
}
static void *bget(kmp_info_t *th, bufsize requested_size) {
thr_data_t *thr = get_thr_data(th);
bufsize size = requested_size;
bfhead_t *b;
void *buf;
int compactseq = 0;
int use_blink = 0;
bfhead_t *best;
if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
return NULL;
}
__kmp_bget_dequeue(th);
if (size < (bufsize)SizeQ) {
size = SizeQ;
}
#if defined(SizeQuant) && (SizeQuant > 1)
size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
#endif
size += sizeof(bhead_t);
KMP_DEBUG_ASSERT(size >= 0);
KMP_DEBUG_ASSERT(size % SizeQuant == 0);
use_blink = (thr->mode == bget_mode_lifo);
a loop around the allocation process to allow compaction to
intervene in case we don't find a suitable buffer in the chain. */
for (;;) {
int bin;
for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
b = (use_blink ? thr->freelist[bin].ql.blink
: thr->freelist[bin].ql.flink);
if (thr->mode == bget_mode_best) {
best = &thr->freelist[bin];
to hold the requested size buffer. */
while (b != &thr->freelist[bin]) {
if (b->bh.bb.bsize >= (bufsize)size) {
if ((best == &thr->freelist[bin]) ||
(b->bh.bb.bsize < best->bh.bb.bsize)) {
best = b;
}
}
b = (use_blink ? b->ql.blink : b->ql.flink);
}
b = best;
}
while (b != &thr->freelist[bin]) {
if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
if ((b->bh.bb.bsize - (bufsize)size) >
(bufsize)(SizeQ + (sizeof(bhead_t)))) {
bhead_t *ba, *bn;
ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
bn = BH(((char *)ba) + size);
KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
b->bh.bb.bsize -= (bufsize)size;
ba->bb.prevfree = b->bh.bb.bsize;
ba->bb.bsize = -size;
TCW_PTR(ba->bb.bthr,
th);
bn->bb.prevfree = 0;
__kmp_bget_remove_from_freelist(b);
__kmp_bget_insert_into_freelist(thr, b);
#if BufStats
thr->totalloc += (size_t)size;
thr->numget++;
#endif
buf = (void *)((((char *)ba) + sizeof(bhead_t)));
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
return buf;
} else {
bhead_t *ba;
ba = BH(((char *)b) + b->bh.bb.bsize);
KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
shebang to the caller and remove it from the free list. */
__kmp_bget_remove_from_freelist(b);
#if BufStats
thr->totalloc += (size_t)b->bh.bb.bsize;
thr->numget++;
#endif
b->bh.bb.bsize = -(b->bh.bb.bsize);
TCW_PTR(ba->bb.bthr, th);
to indicate that this buffer is allocated. */
ba->bb.prevfree = 0;
buf = (void *)&(b->ql);
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
return buf;
}
}
b = (use_blink ? b->ql.blink : b->ql.flink);
}
}
notify it of the size requested. If it returns TRUE, try the allocation
again. */
if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
break;
}
}
if (thr->acqfcn != 0) {
if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
Try to satisfy it by a direct buffer acquisition. */
bdhead_t *bdh;
size += sizeof(bdhead_t) - sizeof(bhead_t);
KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
bdh = BDH((*thr->acqfcn)((bufsize)size));
if (bdh != NULL) {
bdh->bh.bb.bsize = 0;
TCW_PTR(bdh->bh.bb.bthr, th);
bdh->bh.bb.prevfree = 0;
bdh->tsize = size;
#if BufStats
thr->totalloc += (size_t)size;
thr->numget++;
thr->numdget++;
#endif
buf = (void *)(bdh + 1);
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
return buf;
}
} else {
void *newpool;
KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
if (newpool != NULL) {
bpool(th, newpool, thr->exp_incr);
buf = bget(
th, requested_size);
return buf;
}
}
}
return NULL;
}
the entire contents of the buffer to zero, not just the
region requested by the caller. */
static void *bgetz(kmp_info_t *th, bufsize size) {
char *buf = (char *)bget(th, size);
if (buf != NULL) {
bhead_t *b;
bufsize rsize;
b = BH(buf - sizeof(bhead_t));
rsize = -(b->bb.bsize);
if (rsize == 0) {
bdhead_t *bd;
bd = BDH(buf - sizeof(bdhead_t));
rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
} else {
rsize -= sizeof(bhead_t);
}
KMP_DEBUG_ASSERT(rsize >= size);
(void)memset(buf, 0, (bufsize)rsize);
}
return ((void *)buf);
}
simply in terms of brel() and bget(). It could be
enhanced to allow the buffer to grow into adjacent free
blocks and to avoid moving data unnecessarily. */
static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
void *nbuf;
bufsize osize;
bhead_t *b;
nbuf = bget(th, size);
if (nbuf == NULL) {
return NULL;
}
if (buf == NULL) {
return nbuf;
}
b = BH(((char *)buf) - sizeof(bhead_t));
osize = -b->bb.bsize;
if (osize == 0) {
bdhead_t *bd;
bd = BDH(((char *)buf) - sizeof(bdhead_t));
osize = bd->tsize - (bufsize)sizeof(bdhead_t);
} else {
osize -= sizeof(bhead_t);
}
KMP_DEBUG_ASSERT(osize > 0);
(void)KMP_MEMCPY((char *)nbuf, (char *)buf,
(size_t)((size < osize) ? size : osize));
brel(th, buf);
return nbuf;
}
static void brel(kmp_info_t *th, void *buf) {
thr_data_t *thr = get_thr_data(th);
bfhead_t *b, *bn;
kmp_info_t *bth;
KMP_DEBUG_ASSERT(buf != NULL);
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
b = BFH(((char *)buf) - sizeof(bhead_t));
if (b->bh.bb.bsize == 0) {
bdhead_t *bdh;
bdh = BDH(((char *)buf) - sizeof(bdhead_t));
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
#if BufStats
thr->totalloc -= (size_t)bdh->tsize;
thr->numdrel++;
thr->numrel++;
#endif
#ifdef FreeWipe
(void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
#endif
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
KMP_DEBUG_ASSERT(thr->relfcn != 0);
(*thr->relfcn)((void *)bdh);
return;
}
bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
~1);
if (bth != th) {
__kmp_bget_enqueue(bth, buf
#ifdef USE_QUEUING_LOCK_FOR_BGET
,
__kmp_gtid_from_thread(th)
#endif
);
return;
}
if (b->bh.bb.bsize >= 0) {
bn = NULL;
}
KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
#if BufStats
thr->numrel++;
thr->totalloc += (size_t)b->bh.bb.bsize;
#endif
if (b->bh.bb.prevfree != 0) {
the length of this buffer to the previous free buffer. Note that we
subtract the size in the buffer being released, since it's negative to
indicate that the buffer is allocated. */
bufsize size = b->bh.bb.bsize;
KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
b->bh.bb.prevfree);
b = BFH(((char *)b) - b->bh.bb.prevfree);
b->bh.bb.bsize -= size;
__kmp_bget_remove_from_freelist(b);
} else {
(i.e. free) and fall through to place the buffer on the free list as an
isolated free block. */
b->bh.bb.bsize = -b->bh.bb.bsize;
}
__kmp_bget_insert_into_freelist(thr, b);
the start of this buffer by its size, to see if that buffer is
free. If it is, we combine this buffer with the next one in
memory, dechaining the second buffer from the free list. */
bn = BFH(((char *)b) + b->bh.bb.bsize);
if (bn->bh.bb.bsize > 0) {
its size to that of our buffer. */
KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
bn->bh.bb.bsize);
__kmp_bget_remove_from_freelist(bn);
b->bh.bb.bsize += bn->bh.bb.bsize;
* freelist */
__kmp_bget_remove_from_freelist(b);
__kmp_bget_insert_into_freelist(thr, b);
consolidated free block. We must set its backpointer to the
head of the consolidated free block. We know the next block
must be an allocated block because the process of recombination
guarantees that two free blocks will never be contiguous in
memory. */
bn = BFH(((char *)b) + b->bh.bb.bsize);
}
#ifdef FreeWipe
(void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
(size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
#endif
KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
to this buffer; the previous free buffer in memory. */
bn->bh.bb.prevfree = b->bh.bb.bsize;
constitutes the entire block, release it. Note that pool_len
is defined in such a way that the test will fail unless all
pool blocks are the same size. */
if (thr->relfcn != 0 &&
b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
#if BufStats
if (thr->numpblk !=
1) {
#endif
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
b->bh.bb.bsize);
__kmp_bget_remove_from_freelist(b);
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
(*thr->relfcn)(b);
#if BufStats
thr->numprel++;
thr->numpblk--;
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
if (thr->last_pool == b)
thr->last_pool = 0;
} else {
thr->last_pool = b;
}
#endif
}
}
static void bectl(kmp_info_t *th, bget_compact_t compact,
bget_acquire_t acquire, bget_release_t release,
bufsize pool_incr) {
thr_data_t *thr = get_thr_data(th);
thr->compfcn = compact;
thr->acqfcn = acquire;
thr->relfcn = release;
thr->exp_incr = pool_incr;
}
static void bpool(kmp_info_t *th, void *buf, bufsize len) {
thr_data_t *thr = get_thr_data(th);
bfhead_t *b = BFH(buf);
bhead_t *bn;
__kmp_bget_dequeue(th);
#ifdef SizeQuant
len &= ~((bufsize)(SizeQuant - 1));
#endif
if (thr->pool_len == 0) {
thr->pool_len = len;
} else if (len != thr->pool_len) {
thr->pool_len = -1;
}
#if BufStats
thr->numpget++;
thr->numpblk++;
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
#endif
it had better not be (much) larger than the largest buffer
whose size we can store in bhead.bb.bsize. */
KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
there is no free block prior to this one. That blocks
recombination when the first block in memory is released. */
b->bh.bb.prevfree = 0;
buffer is seen when a buffer at the end of the pool is released and
blocks recombination of the last buffer with the dummy buffer at
the end. The length in the dummy buffer is set to the largest
negative number to denote the end of the pool for diagnostic
routines (this specific value is not counted on by the actual
allocation and release functions). */
len -= sizeof(bhead_t);
b->bh.bb.bsize = (bufsize)len;
TCW_PTR(b->bh.bb.bthr,
(kmp_info_t *)((kmp_uintptr_t)th |
1));
__kmp_bget_insert_into_freelist(thr, b);
#ifdef FreeWipe
(void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
(size_t)(len - sizeof(bfhead_t)));
#endif
bn = BH(((char *)b) + len);
bn->bb.prevfree = (bufsize)len;
KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
bn->bb.bsize = ESent;
}
static void bfreed(kmp_info_t *th) {
int bin = 0, count = 0;
int gtid = __kmp_gtid_from_thread(th);
thr_data_t *thr = get_thr_data(th);
#if BufStats
__kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
" get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
" pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
" prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
" drel=%" KMP_INT64_SPEC "\n",
gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
(kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
(kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
(kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
#endif
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
bfhead_t *b;
for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
b = b->ql.flink) {
bufsize bs = b->bh.bb.bsize;
KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
KMP_DEBUG_ASSERT(bs > 0);
count += 1;
__kmp_printf_no_lock(
"__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
(long)bs);
#ifdef FreeWipe
{
char *lerr = ((char *)b) + sizeof(bfhead_t);
if ((bs > sizeof(bfhead_t)) &&
((*lerr != 0x55) ||
(memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
0))) {
__kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
"free block have been overstored.)\n",
gtid);
}
}
#endif
}
}
if (count == 0)
__kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
}
void __kmp_initialize_bget(kmp_info_t *th) {
KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
set_thr_data(th);
bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
(bufsize)__kmp_malloc_pool_incr);
}
void __kmp_finalize_bget(kmp_info_t *th) {
thr_data_t *thr;
bfhead_t *b;
KMP_DEBUG_ASSERT(th != 0);
#if BufStats
thr = (thr_data_t *)th->th.th_local.bget_data;
KMP_DEBUG_ASSERT(thr != NULL);
b = thr->last_pool;
the entire block, release it. Note that pool_len is defined in such a way
that the test will fail unless all pool blocks are the same size. */
if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
b->bh.bb.bsize);
__kmp_bget_remove_from_freelist(b);
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
(*thr->relfcn)(b);
thr->numprel++;
thr->numpblk--;
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
}
#endif
if (th->th.th_local.bget_data != NULL) {
__kmp_free(th->th.th_local.bget_data);
th->th.th_local.bget_data = NULL;
}
}
void kmpc_set_poolsize(size_t size) {
bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
(bget_release_t)free, (bufsize)size);
}
size_t kmpc_get_poolsize(void) {
thr_data_t *p;
p = get_thr_data(__kmp_get_thread());
return p->exp_incr;
}
void kmpc_set_poolmode(int mode) {
thr_data_t *p;
if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
mode == bget_mode_best) {
p = get_thr_data(__kmp_get_thread());
p->mode = (bget_mode_t)mode;
}
}
int kmpc_get_poolmode(void) {
thr_data_t *p;
p = get_thr_data(__kmp_get_thread());
return p->mode;
}
void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
kmp_info_t *th = __kmp_get_thread();
bufsize a, b;
__kmp_bget_dequeue(th);
bcheck(th, &a, &b);
*maxmem = a;
*allmem = b;
}
void kmpc_poolprint(void) {
kmp_info_t *th = __kmp_get_thread();
__kmp_bget_dequeue(th);
bfreed(th);
}
#endif
void *kmpc_malloc(size_t size) {
void *ptr;
ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
if (ptr != NULL) {
*(void **)ptr = ptr;
ptr = (void **)ptr + 1;
}
return ptr;
}
#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
void *kmpc_aligned_malloc(size_t size, size_t alignment) {
void *ptr;
void *ptr_allocated;
KMP_DEBUG_ASSERT(alignment < 32 * 1024);
if (!IS_POWER_OF_TWO(alignment)) {
errno = EINVAL;
return NULL;
}
size = size + sizeof(void *) + alignment;
ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
if (ptr_allocated != NULL) {
ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
~(alignment - 1));
*((void **)ptr - 1) = ptr_allocated;
} else {
ptr = NULL;
}
return ptr;
}
void *kmpc_calloc(size_t nelem, size_t elsize) {
void *ptr;
ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
if (ptr != NULL) {
*(void **)ptr = ptr;
ptr = (void **)ptr + 1;
}
return ptr;
}
void *kmpc_realloc(void *ptr, size_t size) {
void *result = NULL;
if (ptr == NULL) {
result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
if (result != NULL) {
*(void **)result = result;
result = (void **)result + 1;
}
} else if (size == 0) {
KMP_ASSERT(*((void **)ptr - 1));
brel(__kmp_get_thread(), *((void **)ptr - 1));
} else {
result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
(bufsize)(size + sizeof(ptr)));
if (result != NULL) {
*(void **)result = result;
result = (void **)result + 1;
}
}
return result;
}
void kmpc_free(void *ptr) {
if (!__kmp_init_serial) {
return;
}
if (ptr != NULL) {
kmp_info_t *th = __kmp_get_thread();
__kmp_bget_dequeue(th);
KMP_ASSERT(*((void **)ptr - 1));
brel(th, *((void **)ptr - 1));
}
}
void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
void *ptr;
KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
(int)size KMP_SRC_LOC_PARM));
ptr = bget(th, (bufsize)size);
KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
return ptr;
}
void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
size_t elsize KMP_SRC_LOC_DECL) {
void *ptr;
KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
(int)nelem, (int)elsize KMP_SRC_LOC_PARM));
ptr = bgetz(th, (bufsize)(nelem * elsize));
KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
return ptr;
}
void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
size_t size KMP_SRC_LOC_DECL) {
KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
ptr, (int)size KMP_SRC_LOC_PARM));
ptr = bgetr(th, ptr, (bufsize)size);
KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
return ptr;
}
void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
ptr KMP_SRC_LOC_PARM));
if (ptr != NULL) {
__kmp_bget_dequeue(th);
brel(th, ptr);
}
KE_TRACE(30, ("<- __kmp_thread_free()\n"));
}
static const char *kmp_mk_lib_name;
static void *h_memkind;
static void *(*kmp_mk_alloc)(void *k, size_t sz);
static void (*kmp_mk_free)(void *kind, void *ptr);
static int (*kmp_mk_check)(void *kind);
static void **mk_default;
static void **mk_interleave;
static void **mk_hbw;
static void **mk_hbw_interleave;
static void **mk_hbw_preferred;
static void **mk_hugetlb;
static void **mk_hbw_hugetlb;
static void **mk_hbw_preferred_hugetlb;
static void **mk_dax_kmem;
static void **mk_dax_kmem_all;
static void **mk_dax_kmem_preferred;
static void *(*kmp_target_alloc_host)(size_t size, int device);
static void *(*kmp_target_alloc_shared)(size_t size, int device);
static void *(*kmp_target_alloc_device)(size_t size, int device);
static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
static void *(*kmp_target_unlock_mem)(void *ptr, int device);
static void *(*kmp_target_free_host)(void *ptr, int device);
static void *(*kmp_target_free_shared)(void *ptr, int device);
static void *(*kmp_target_free_device)(void *ptr, int device);
static bool __kmp_target_mem_available;
#define KMP_IS_TARGET_MEM_SPACE(MS) \
(MS == llvm_omp_target_host_mem_space || \
MS == llvm_omp_target_shared_mem_space || \
MS == llvm_omp_target_device_mem_space)
#define KMP_IS_TARGET_MEM_ALLOC(MA) \
(MA == llvm_omp_target_host_mem_alloc || \
MA == llvm_omp_target_shared_mem_alloc || \
MA == llvm_omp_target_device_mem_alloc)
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
static inline void chk_kind(void ***pkind) {
KMP_DEBUG_ASSERT(pkind);
if (*pkind)
if (kmp_mk_check(**pkind))
*pkind = NULL;
}
#endif
void __kmp_init_memkind() {
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
kmp_mk_lib_name = "libmemkind.so";
h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
if (h_memkind) {
kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
kmp_mk_alloc =
(void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
!kmp_mk_check(*mk_default)) {
__kmp_memkind_available = 1;
mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
chk_kind(&mk_interleave);
mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
chk_kind(&mk_hbw);
mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
chk_kind(&mk_hbw_interleave);
mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
chk_kind(&mk_hbw_preferred);
mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
chk_kind(&mk_hugetlb);
mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
chk_kind(&mk_hbw_hugetlb);
mk_hbw_preferred_hugetlb =
(void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
chk_kind(&mk_hbw_preferred_hugetlb);
mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
chk_kind(&mk_dax_kmem);
mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
chk_kind(&mk_dax_kmem_all);
mk_dax_kmem_preferred =
(void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
chk_kind(&mk_dax_kmem_preferred);
KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
return;
}
dlclose(h_memkind);
}
#else
kmp_mk_lib_name = "";
#endif
h_memkind = NULL;
kmp_mk_check = NULL;
kmp_mk_alloc = NULL;
kmp_mk_free = NULL;
mk_default = NULL;
mk_interleave = NULL;
mk_hbw = NULL;
mk_hbw_interleave = NULL;
mk_hbw_preferred = NULL;
mk_hugetlb = NULL;
mk_hbw_hugetlb = NULL;
mk_hbw_preferred_hugetlb = NULL;
mk_dax_kmem = NULL;
mk_dax_kmem_all = NULL;
mk_dax_kmem_preferred = NULL;
}
void __kmp_fini_memkind() {
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
if (__kmp_memkind_available)
KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
if (h_memkind) {
dlclose(h_memkind);
h_memkind = NULL;
}
kmp_mk_check = NULL;
kmp_mk_alloc = NULL;
kmp_mk_free = NULL;
mk_default = NULL;
mk_interleave = NULL;
mk_hbw = NULL;
mk_hbw_interleave = NULL;
mk_hbw_preferred = NULL;
mk_hugetlb = NULL;
mk_hbw_hugetlb = NULL;
mk_hbw_preferred_hugetlb = NULL;
mk_dax_kmem = NULL;
mk_dax_kmem_all = NULL;
mk_dax_kmem_preferred = NULL;
#endif
}
void __kmp_init_target_mem() {
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
*(void **)(&kmp_target_alloc_shared) =
KMP_DLSYM("llvm_omp_target_alloc_shared");
*(void **)(&kmp_target_alloc_device) =
KMP_DLSYM("llvm_omp_target_alloc_device");
*(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
*(void **)(&kmp_target_free_shared) =
KMP_DLSYM("llvm_omp_target_free_shared");
*(void **)(&kmp_target_free_device) =
KMP_DLSYM("llvm_omp_target_free_device");
__kmp_target_mem_available =
kmp_target_alloc_host && kmp_target_alloc_shared &&
kmp_target_alloc_device && kmp_target_free_host &&
kmp_target_free_shared && kmp_target_free_device;
*(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
*(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
}
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
int ntraits,
omp_alloctrait_t traits[]) {
KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
kmp_allocator_t *al;
int i;
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t));
al->memspace = ms;
for (i = 0; i < ntraits; ++i) {
switch (traits[i].key) {
case omp_atk_sync_hint:
case omp_atk_access:
break;
case omp_atk_pinned:
al->pinned = true;
break;
case omp_atk_alignment:
__kmp_type_convert(traits[i].value, &(al->alignment));
KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
break;
case omp_atk_pool_size:
al->pool_size = traits[i].value;
break;
case omp_atk_fallback:
al->fb = (omp_alloctrait_value_t)traits[i].value;
KMP_DEBUG_ASSERT(
al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
break;
case omp_atk_fb_data:
al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
break;
case omp_atk_partition:
al->memkind = RCAST(void **, traits[i].value);
break;
default:
KMP_ASSERT2(0, "Unexpected allocator trait");
}
}
if (al->fb == 0) {
al->fb = omp_atv_default_mem_fb;
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al->fb_data != NULL);
} else if (al->fb == omp_atv_default_mem_fb) {
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
}
if (__kmp_memkind_available) {
if (ms == omp_high_bw_mem_space) {
if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
al->memkind = mk_hbw_interleave;
} else if (mk_hbw_preferred) {
al->memkind = mk_hbw_preferred;
} else {
__kmp_free(al);
return omp_null_allocator;
}
} else if (ms == omp_large_cap_mem_space) {
if (mk_dax_kmem_all) {
al->memkind = mk_dax_kmem_all;
} else if (mk_dax_kmem) {
al->memkind = mk_dax_kmem;
} else {
__kmp_free(al);
return omp_null_allocator;
}
} else {
if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
al->memkind = mk_interleave;
} else {
al->memkind = mk_default;
}
}
} else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
__kmp_free(al);
return omp_null_allocator;
} else {
if (ms == omp_high_bw_mem_space) {
__kmp_free(al);
return omp_null_allocator;
}
}
return (omp_allocator_handle_t)al;
}
void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
if (allocator > kmp_max_mem_alloc)
__kmp_free(allocator);
}
void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
if (allocator == omp_null_allocator)
allocator = omp_default_mem_alloc;
__kmp_threads[gtid]->th.th_def_allocator = allocator;
}
omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
return __kmp_threads[gtid]->th.th_def_allocator;
}
typedef struct kmp_mem_desc {
void *ptr_alloc;
size_t size_a;
size_t size_orig;
void *ptr_align;
kmp_allocator_t *allocator;
} kmp_mem_desc_t;
static int alignment = sizeof(void *);
void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
void *ptr = __kmp_alloc(gtid, 0, size, allocator);
KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
return ptr;
}
void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
omp_allocator_handle_t allocator) {
KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
(int)size, allocator));
void *ptr = __kmp_alloc(gtid, algn, size, allocator);
KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
return ptr;
}
void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
omp_allocator_handle_t allocator) {
KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
(int)size, allocator));
void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
return ptr;
}
void *__kmpc_realloc(int gtid, void *ptr, size_t size,
omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
allocator, free_allocator));
void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
return nptr;
}
void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
___kmpc_free(gtid, ptr, allocator);
KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
return;
}
void *__kmp_alloc(int gtid, size_t algn, size_t size,
omp_allocator_handle_t allocator) {
void *ptr = NULL;
kmp_allocator_t *al;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (size == 0)
return NULL;
if (allocator == omp_null_allocator)
allocator = __kmp_threads[gtid]->th.th_def_allocator;
kmp_int32 default_device =
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
al = RCAST(kmp_allocator_t *, allocator);
int sz_desc = sizeof(kmp_mem_desc_t);
kmp_mem_desc_t desc;
kmp_uintptr_t addr;
kmp_uintptr_t addr_align;
kmp_uintptr_t addr_descr;
size_t align = alignment;
if (allocator > kmp_max_mem_alloc && al->alignment > align)
align = al->alignment;
if (align < algn)
align = algn;
desc.size_orig = size;
desc.size_a = size + sz_desc + align;
bool is_pinned = false;
if (allocator > kmp_max_mem_alloc)
is_pinned = al->pinned;
int use_default_allocator = (__kmp_memkind_available) ? false : true;
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
if (__kmp_target_mem_available) {
kmp_int32 device =
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
if (allocator == llvm_omp_target_host_mem_alloc)
ptr = kmp_target_alloc_host(size, device);
else if (allocator == llvm_omp_target_shared_mem_alloc)
ptr = kmp_target_alloc_shared(size, device);
else
ptr = kmp_target_alloc_device(size, device);
return ptr;
} else {
KMP_INFORM(TargetMemNotAvailable);
}
}
if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
if (__kmp_target_mem_available) {
kmp_int32 device =
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
if (al->memspace == llvm_omp_target_host_mem_space)
ptr = kmp_target_alloc_host(size, device);
else if (al->memspace == llvm_omp_target_shared_mem_space)
ptr = kmp_target_alloc_shared(size, device);
else
ptr = kmp_target_alloc_device(size, device);
return ptr;
} else {
KMP_INFORM(TargetMemNotAvailable);
}
}
if (__kmp_memkind_available) {
if (allocator < kmp_max_mem_alloc) {
if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
} else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
} else {
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
}
} else if (al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
} else {
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
}
}
} else {
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
}
}
} else if (allocator < kmp_max_mem_alloc) {
if (allocator == omp_high_bw_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
} else if (allocator == omp_large_cap_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
} else if (allocator == omp_const_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
} else if (allocator == omp_low_lat_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
} else if (allocator == omp_cgroup_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
} else if (allocator == omp_pteam_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
} else if (allocator == omp_thread_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
} else {
use_default_allocator = true;
}
if (use_default_allocator) {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
use_default_allocator = false;
}
} else if (al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
} else {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
}
}
} else {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
}
}
KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
if (ptr == NULL)
return NULL;
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, desc.size_a, default_device);
addr = (kmp_uintptr_t)ptr;
addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
addr_descr = addr_align - sz_desc;
desc.ptr_alloc = ptr;
desc.ptr_align = (void *)addr_align;
desc.allocator = al;
*((kmp_mem_desc_t *)addr_descr) = desc;
KMP_MB();
return desc.ptr_align;
}
void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
omp_allocator_handle_t allocator) {
void *ptr = NULL;
kmp_allocator_t *al;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (allocator == omp_null_allocator)
allocator = __kmp_threads[gtid]->th.th_def_allocator;
al = RCAST(kmp_allocator_t *, allocator);
if (nmemb == 0 || size == 0)
return ptr;
if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0);
}
return ptr;
}
ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
if (ptr) {
memset(ptr, 0x00, nmemb * size);
}
return ptr;
}
void *__kmp_realloc(int gtid, void *ptr, size_t size,
omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
void *nptr = NULL;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (size == 0) {
if (ptr != NULL)
___kmpc_free(gtid, ptr, free_allocator);
return nptr;
}
nptr = __kmp_alloc(gtid, 0, size, allocator);
if (nptr != NULL && ptr != NULL) {
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align;
kmp_uintptr_t addr_descr;
addr_align = (kmp_uintptr_t)ptr;
addr_descr = addr_align - sizeof(kmp_mem_desc_t);
desc = *((kmp_mem_desc_t *)addr_descr);
KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
KMP_DEBUG_ASSERT(desc.size_orig > 0);
KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
KMP_MEMCPY((char *)nptr, (char *)ptr,
(size_t)((size < desc.size_orig) ? size : desc.size_orig));
}
if (nptr != NULL) {
___kmpc_free(gtid, ptr, free_allocator);
}
return nptr;
}
void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
if (ptr == NULL)
return;
kmp_allocator_t *al;
omp_allocator_handle_t oal;
al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align;
kmp_uintptr_t addr_descr;
if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
(allocator > kmp_max_mem_alloc &&
KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
kmp_int32 device =
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
if (allocator == llvm_omp_target_host_mem_alloc) {
kmp_target_free_host(ptr, device);
} else if (allocator == llvm_omp_target_shared_mem_alloc) {
kmp_target_free_shared(ptr, device);
} else if (allocator == llvm_omp_target_device_mem_alloc) {
kmp_target_free_device(ptr, device);
}
return;
}
addr_align = (kmp_uintptr_t)ptr;
addr_descr = addr_align - sizeof(kmp_mem_desc_t);
desc = *((kmp_mem_desc_t *)addr_descr);
KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
if (allocator) {
KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
}
al = desc.allocator;
oal = (omp_allocator_handle_t)al;
KMP_DEBUG_ASSERT(al);
if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
kmp_int32 device =
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
kmp_target_unlock_mem(desc.ptr_alloc, device);
}
if (__kmp_memkind_available) {
if (oal < kmp_max_mem_alloc) {
if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
} else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
} else {
kmp_mk_free(*mk_default, desc.ptr_alloc);
}
} else {
if (al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used;
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
kmp_mk_free(*al->memkind, desc.ptr_alloc);
}
} else {
if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used;
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
__kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
}
}
memory leaks, but it may be useful for debugging memory corruptions, used
freed pointers, etc. */
struct kmp_mem_descr {
void *ptr_allocated;
size_t size_allocated;
void *ptr_aligned;
size_t size_aligned;
};
typedef struct kmp_mem_descr kmp_mem_descr_t;
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
error. Must use __kmp_free when freeing memory allocated by this routine! */
static void *___kmp_allocate_align(size_t size,
size_t alignment KMP_SRC_LOC_DECL) {
requested to return properly aligned pointer. Original pointer returned
by malloc() and size of allocated block is saved in descriptor just
before the aligned pointer. This information used by __kmp_free() -- it
has to pass to free() original pointer, not aligned one.
+---------+------------+-----------------------------------+---------+
| padding | descriptor | aligned block | padding |
+---------+------------+-----------------------------------+---------+
^ ^
| |
| +- Aligned pointer returned to caller
+- Pointer returned by malloc()
Aligned block is filled with zeros, paddings are filled with 0xEF. */
kmp_mem_descr_t descr;
kmp_uintptr_t addr_allocated;
kmp_uintptr_t addr_aligned;
kmp_uintptr_t addr_descr;
KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
(int)size, (int)alignment KMP_SRC_LOC_PARM));
KMP_DEBUG_ASSERT(alignment < 32 * 1024);
KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
descr.size_aligned = size;
descr.size_allocated =
descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
#if KMP_DEBUG
descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
#else
descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
#endif
KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
descr.ptr_allocated));
if (descr.ptr_allocated == NULL) {
KMP_FATAL(OutOfHeapMemory);
}
addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
addr_aligned =
(addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
descr.ptr_aligned = (void *)addr_aligned;
KE_TRACE(26, (" ___kmp_allocate_align: "
"ptr_allocated=%p, size_allocated=%d, "
"ptr_aligned=%p, size_aligned=%d\n",
descr.ptr_allocated, (int)descr.size_allocated,
descr.ptr_aligned, (int)descr.size_aligned));
KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
addr_allocated + descr.size_allocated);
KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
#ifdef KMP_DEBUG
memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
#endif
memset(descr.ptr_aligned, 0x00, descr.size_aligned);
*((kmp_mem_descr_t *)addr_descr) = descr;
KMP_MB();
KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
return descr.ptr_aligned;
}
Do not call this func directly! Use __kmp_allocate macro instead.
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
error. Must use __kmp_free when freeing memory allocated by this routine! */
void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
void *ptr;
KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
(int)size KMP_SRC_LOC_PARM));
ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
return ptr;
}
Does not call this func directly! Use __kmp_page_allocate macro instead.
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
error. Must use __kmp_free when freeing memory allocated by this routine! */
void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
int page_size = 8 * 1024;
void *ptr;
KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
(int)size KMP_SRC_LOC_PARM));
ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
return ptr;
}
In debug mode, fill the memory block with 0xEF before call to free(). */
void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
kmp_mem_descr_t descr;
#if KMP_DEBUG
kmp_uintptr_t addr_allocated;
kmp_uintptr_t addr_aligned;
#endif
KE_TRACE(25,
("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
KMP_ASSERT(ptr != NULL);
descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
KE_TRACE(26, (" __kmp_free: "
"ptr_allocated=%p, size_allocated=%d, "
"ptr_aligned=%p, size_aligned=%d\n",
descr.ptr_allocated, (int)descr.size_allocated,
descr.ptr_aligned, (int)descr.size_aligned));
#if KMP_DEBUG
addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
addr_allocated + descr.size_allocated);
memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
#endif
#ifndef LEAK_MEMORY
KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
#ifdef KMP_DEBUG
_free_src_loc(descr.ptr_allocated, _file_, _line_);
#else
free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
#endif
#endif
KMP_MB();
KE_TRACE(25, ("<- __kmp_free() returns\n"));
}
#if USE_FAST_MEMORY == 3
#define KMP_FREE_LIST_LIMIT 16
#define DCACHE_LINE 128
void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
void *ptr;
size_t num_lines, idx;
int index;
void *alloc_ptr;
size_t alloc_size;
kmp_mem_descr_t *descr;
KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
__kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
idx = num_lines - 1;
KMP_DEBUG_ASSERT(idx >= 0);
if (idx < 2) {
index = 0;
num_lines = 2;
} else if ((idx >>= 2) == 0) {
index = 1;
num_lines = 4;
} else if ((idx >>= 2) == 0) {
index = 2;
num_lines = 16;
} else if ((idx >>= 2) == 0) {
index = 3;
num_lines = 64;
} else {
goto alloc_call;
}
ptr = this_thr->th.th_free_lists[index].th_free_list_self;
if (ptr != NULL) {
this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
->ptr_aligned);
goto end;
}
ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
if (ptr != NULL) {
while (!KMP_COMPARE_AND_STORE_PTR(
&this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
KMP_CPU_PAUSE();
ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
}
this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
->ptr_aligned);
goto end;
}
alloc_call:
size = num_lines * DCACHE_LINE;
alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
"alloc_size %d\n",
__kmp_gtid_from_thread(this_thr), alloc_size));
alloc_ptr = bget(this_thr, (bufsize)alloc_size);
ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
DCACHE_LINE) &
~(DCACHE_LINE - 1));
descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
descr->ptr_allocated = alloc_ptr;
descr->ptr_aligned = (void *)this_thr;
descr->size_aligned = size;
end:
KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
__kmp_gtid_from_thread(this_thr), ptr));
return ptr;
}
void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
kmp_mem_descr_t *descr;
kmp_info_t *alloc_thr;
size_t size;
size_t idx;
int index;
KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
__kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
KMP_ASSERT(ptr != NULL);
descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
(int)descr->size_aligned));
size = descr->size_aligned;
idx = DCACHE_LINE * 2;
if (idx == size) {
index = 0;
} else if ((idx <<= 1) == size) {
index = 1;
} else if ((idx <<= 2) == size) {
index = 2;
} else if ((idx <<= 2) == size) {
index = 3;
} else {
KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
goto free_call;
}
alloc_thr = (kmp_info_t *)descr->ptr_aligned;
if (alloc_thr == this_thr) {
*((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
this_thr->th.th_free_lists[index].th_free_list_self = ptr;
} else {
void *head = this_thr->th.th_free_lists[index].th_free_list_other;
if (head == NULL) {
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
*((void **)ptr) = NULL;
descr->size_allocated = (size_t)1;
} else {
kmp_mem_descr_t *dsc =
(kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
size_t q_sz =
dsc->size_allocated + 1;
if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
*((void **)ptr) = head;
descr->size_allocated = q_sz;
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
} else {
void *old_ptr;
void *tail = head;
void *next = *((void **)head);
while (next != NULL) {
KMP_DEBUG_ASSERT(
((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
->size_allocated +
1 ==
((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
->size_allocated);
tail = next;
next = *((void **)next);
}
KMP_DEBUG_ASSERT(q_th != NULL);
old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
exposing a broken list to other threads, even for an instant. */
*((void **)tail) = old_ptr;
while (!KMP_COMPARE_AND_STORE_PTR(
&q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
KMP_CPU_PAUSE();
old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
*((void **)tail) = old_ptr;
}
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
*((void **)ptr) = NULL;
descr->size_allocated = (size_t)1;
}
}
}
goto end;
free_call:
KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
__kmp_gtid_from_thread(this_thr), size));
__kmp_bget_dequeue(this_thr);
brel(this_thr, descr->ptr_allocated);
end:
KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
}
void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
}
void __kmp_free_fast_memory(kmp_info_t *th) {
int bin;
thr_data_t *thr = get_thr_data(th);
void **lst = NULL;
KE_TRACE(
5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
__kmp_bget_dequeue(th);
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
bfhead_t *b = thr->freelist[bin].ql.flink;
while (b != &thr->freelist[bin]) {
if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
*((void **)b) =
lst;
lst = (void **)b;
}
b = b->ql.flink;
}
}
while (lst != NULL) {
void *next = *lst;
KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
lst, next, th, __kmp_gtid_from_thread(th)));
(*thr->relfcn)(lst);
#if BufStats
thr->numprel++;
thr->numpblk--;
#endif
lst = (void **)next;
}
KE_TRACE(
5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
}
#endif