* kmp_str.cpp -- String manipulation routines.
*/
#include "kmp_str.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "kmp.h"
#include "kmp_i18n.h"
Usage:
// Declare buffer and initialize it.
kmp_str_buf_t buffer;
__kmp_str_buf_init( & buffer );
// Print to buffer.
__kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12);
__kmp_str_buf_print(& buffer, " <%s>\n", line);
// Use buffer contents. buffer.str is a pointer to data, buffer.used is a
// number of printed characters (not including terminating zero).
write( fd, buffer.str, buffer.used );
// Free buffer.
__kmp_str_buf_free( & buffer );
// Alternatively, you can detach allocated memory from buffer:
__kmp_str_buf_detach( & buffer );
return buffer.str; // That memory should be freed eventually.
Notes:
* Buffer users may use buffer.str and buffer.used. Users should not change
any fields of buffer directly.
* buffer.str is never NULL. If buffer is empty, buffer.str points to empty
string ("").
* For performance reasons, buffer uses stack memory (buffer.bulk) first. If
stack memory is exhausted, buffer allocates memory on heap by malloc(), and
reallocates it by realloc() as amount of used memory grows.
* Buffer doubles amount of allocated memory each time it is exhausted.
*/
#define KMP_STR_BUF_INVARIANT(b) \
{ \
KMP_DEBUG_ASSERT((b)->str != NULL); \
KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \
KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \
KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \
KMP_DEBUG_ASSERT( \
(b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \
KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \
: 1); \
}
void __kmp_str_buf_clear(kmp_str_buf_t *buffer) {
KMP_STR_BUF_INVARIANT(buffer);
if (buffer->used > 0) {
buffer->used = 0;
buffer->str[0] = 0;
}
KMP_STR_BUF_INVARIANT(buffer);
}
void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, size_t size) {
KMP_STR_BUF_INVARIANT(buffer);
KMP_DEBUG_ASSERT(size >= 0);
if (buffer->size < (unsigned int)size) {
do {
buffer->size *= 2;
} while (buffer->size < (unsigned int)size);
if (buffer->str == &buffer->bulk[0]) {
buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
if (buffer->str == NULL) {
KMP_FATAL(MemoryAllocFailed);
}
KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
} else {
buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size);
if (buffer->str == NULL) {
KMP_FATAL(MemoryAllocFailed);
}
}
}
KMP_DEBUG_ASSERT(buffer->size > 0);
KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size);
KMP_STR_BUF_INVARIANT(buffer);
}
void __kmp_str_buf_detach(kmp_str_buf_t *buffer) {
KMP_STR_BUF_INVARIANT(buffer);
if (buffer->size <= sizeof(buffer->bulk)) {
buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
if (buffer->str == NULL) {
KMP_FATAL(MemoryAllocFailed);
}
KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
}
}
void __kmp_str_buf_free(kmp_str_buf_t *buffer) {
KMP_STR_BUF_INVARIANT(buffer);
if (buffer->size > sizeof(buffer->bulk)) {
KMP_INTERNAL_FREE(buffer->str);
}
buffer->str = buffer->bulk;
buffer->size = sizeof(buffer->bulk);
buffer->used = 0;
KMP_STR_BUF_INVARIANT(buffer);
}
void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, size_t len) {
KMP_STR_BUF_INVARIANT(buffer);
KMP_DEBUG_ASSERT(str != NULL);
KMP_DEBUG_ASSERT(len >= 0);
__kmp_str_buf_reserve(buffer, buffer->used + len + 1);
buffer->str[buffer->used] = '\0';
KMP_STRNCAT_S(buffer->str + buffer->used, len + 1, str, len);
__kmp_type_convert(buffer->used + len, &(buffer->used));
KMP_STR_BUF_INVARIANT(buffer);
}
void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) {
KMP_DEBUG_ASSERT(dest);
KMP_DEBUG_ASSERT(src);
KMP_STR_BUF_INVARIANT(dest);
KMP_STR_BUF_INVARIANT(src);
if (!src->str || !src->used)
return;
__kmp_str_buf_reserve(dest, dest->used + src->used + 1);
dest->str[dest->used] = '\0';
KMP_STRNCAT_S(dest->str + dest->used, src->used + 1, src->str, src->used);
dest->used += src->used;
KMP_STR_BUF_INVARIANT(dest);
}
int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
va_list args) {
int rc;
KMP_STR_BUF_INVARIANT(buffer);
for (;;) {
int const free = buffer->size - buffer->used;
int size;
{
vsnprintf() crashes if it is called for the second time with the same
args. To prevent the crash, we have to pass a fresh intact copy of args
to vsnprintf() on each iteration.
Unfortunately, standard va_copy() macro is not available on Windows*
OS. However, it seems vsnprintf() does not modify args argument on
Windows* OS.
*/
#if !KMP_OS_WINDOWS
va_list _args;
va_copy(_args, args);
#define args _args
#endif
rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args);
#if !KMP_OS_WINDOWS
#undef args
va_end(_args);
#endif
}
if (rc >= 0 && rc < free) {
buffer->used += rc;
break;
}
if (rc >= 0) {
size = buffer->used + rc + 1;
} else {
size = buffer->size * 2;
}
__kmp_str_buf_reserve(buffer, size);
}
KMP_DEBUG_ASSERT(buffer->size > 0);
KMP_STR_BUF_INVARIANT(buffer);
return rc;
}
int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
int rc;
va_list args;
va_start(args, format);
rc = __kmp_str_buf_vprint(buffer, format, args);
va_end(args);
return rc;
}
possible unit, for example 1024 is printed as "1k". */
void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) {
char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"};
int const units = sizeof(names) / sizeof(char const *);
int u = 0;
if (size > 0) {
while ((size % 1024 == 0) && (u + 1 < units)) {
size = size / 1024;
++u;
}
}
__kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]);
}
void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) {
fname->path = NULL;
fname->dir = NULL;
fname->base = NULL;
if (path != NULL) {
char *slash = NULL;
char *base = NULL;
fname->path = __kmp_str_format("%s", path);
if (KMP_OS_WINDOWS) {
__kmp_str_replace(fname->path, '\\', '/');
}
fname->dir = __kmp_str_format("%s", fname->path);
slash = strrchr(fname->dir, '/');
if (KMP_OS_WINDOWS &&
slash == NULL) {
char first = (char)TOLOWER(fname->dir[0]);
if ('a' <= first && first <= 'z' && fname->dir[1] == ':') {
slash = &fname->dir[1];
}
}
base = (slash == NULL ? fname->dir : slash + 1);
fname->base = __kmp_str_format("%s", base);
*base = 0;
}
}
void __kmp_str_fname_free(kmp_str_fname_t *fname) {
__kmp_str_free(&fname->path);
__kmp_str_free(&fname->dir);
__kmp_str_free(&fname->base);
}
int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) {
int dir_match = 1;
int base_match = 1;
if (pattern != NULL) {
kmp_str_fname_t ptrn;
__kmp_str_fname_init(&ptrn, pattern);
dir_match = strcmp(ptrn.dir, "*/") == 0 ||
(fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir));
base_match = strcmp(ptrn.base, "*") == 0 ||
(fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base));
__kmp_str_fname_free(&ptrn);
}
return dir_match && base_match;
}
void __kmp_str_loc_numbers(char const *Psource, int *LineBeg,
int *LineEndOrCol) {
char *Str;
KMP_DEBUG_ASSERT(LineBeg);
KMP_DEBUG_ASSERT(LineEndOrCol);
KMP_DEBUG_ASSERT(Psource);
#ifdef __cplusplus
Str = strchr(CCAST(char *, Psource), ';');
#else
Str = strchr(Psource, ';');
#endif
if (Str) {
Str = strchr(Str + 1, ';');
}
if (Str) {
Str = strchr(Str + 1, ';');
}
if (Str) {
*LineBeg = atoi(Str + 1);
Str = strchr(Str + 1, ';');
} else {
*LineBeg = 0;
}
if (Str) {
*LineEndOrCol = atoi(Str + 1);
} else {
*LineEndOrCol = 0;
}
}
kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname) {
kmp_str_loc_t loc;
loc._bulk = NULL;
loc.file = NULL;
loc.func = NULL;
loc.line = 0;
loc.col = 0;
if (psource != NULL) {
char *str = NULL;
char *dummy = NULL;
char *line = NULL;
char *col = NULL;
loc._bulk = __kmp_str_format("%s", psource);
str = loc._bulk;
__kmp_str_split(str, ';', &dummy, &str);
__kmp_str_split(str, ';', &loc.file, &str);
__kmp_str_split(str, ';', &loc.func, &str);
__kmp_str_split(str, ';', &line, &str);
__kmp_str_split(str, ';', &col, &str);
if (line != NULL) {
loc.line = atoi(line);
if (loc.line < 0) {
loc.line = 0;
}
}
if (col != NULL) {
loc.col = atoi(col);
if (loc.col < 0) {
loc.col = 0;
}
}
}
__kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL);
return loc;
}
void __kmp_str_loc_free(kmp_str_loc_t *loc) {
__kmp_str_fname_free(&loc->fname);
__kmp_str_free(&(loc->_bulk));
loc->file = NULL;
loc->func = NULL;
}
are case-insensitive, so functions performs case-insensitive comparison. On
Linux* OS it performs case-sensitive comparison. Note: The function returns
*true* if strings are *equal*. */
int __kmp_str_eqf(
char const *lhs,
char const *rhs
) {
int result;
#if KMP_OS_WINDOWS
result = (_stricmp(lhs, rhs) == 0);
#else
result = (strcmp(lhs, rhs) == 0);
#endif
return result;
}
freed eventually by __kmp_str_free(). The function is very convenient for
constructing strings, it successfully replaces strdup(), strcat(), it frees
programmer from buffer allocations and helps to avoid buffer overflows.
Examples:
str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size
__kmp_str_free( & str );
str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care
// about buffer size.
__kmp_str_free( & str );
str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
__kmp_str_free( & str );
Performance note:
This function allocates memory with malloc() calls, so do not call it from
performance-critical code. In performance-critical code consider using
kmp_str_buf_t instead, since it uses stack-allocated buffer for short
strings.
Why does this function use malloc()?
1. __kmp_allocate() returns cache-aligned memory allocated with malloc().
There are no reasons in using __kmp_allocate() for strings due to extra
overhead while cache-aligned memory is not necessary.
2. __kmp_thread_malloc() cannot be used because it requires pointer to thread
structure. We need to perform string operations during library startup
(for example, in __kmp_register_library_startup()) when no thread
structures are allocated yet.
So standard malloc() is the only available option.
*/
char *__kmp_str_format(
char const *format,
...
) {
va_list args;
int size = 512;
char *buffer = NULL;
int rc;
buffer = (char *)KMP_INTERNAL_MALLOC(size);
if (buffer == NULL) {
KMP_FATAL(MemoryAllocFailed);
}
for (;;) {
va_start(args, format);
rc = KMP_VSNPRINTF(buffer, size, format, args);
va_end(args);
if (rc >= 0 && rc < size) {
break;
}
if (rc >= 0) {
size = rc + 1;
} else {
size = size * 2;
}
buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size);
if (buffer == NULL) {
KMP_FATAL(MemoryAllocFailed);
}
}
return buffer;
}
void __kmp_str_free(char **str) {
KMP_DEBUG_ASSERT(str != NULL);
KMP_INTERNAL_FREE(*str);
*str = NULL;
}
match. If len is negative, returns true iff target is a case-insensitive
substring of data. If len is positive, returns true iff target is a
case-insensitive substring of data or vice versa, and neither is shorter than
len. */
int __kmp_str_match(char const *target, int len, char const *data) {
int i;
if (target == NULL || data == NULL) {
return FALSE;
}
for (i = 0; target[i] && data[i]; ++i) {
if (TOLOWER(target[i]) != TOLOWER(data[i])) {
return FALSE;
}
}
return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
}
bool __kmp_str_contains(char const *target, int len, char const *data) {
int i = 0, j = 0, start = 0;
if (target == NULL || data == NULL) {
return FALSE;
}
while (target[i]) {
if (!data[j])
return FALSE;
if (TOLOWER(target[i]) != TOLOWER(data[j])) {
j = start + 1;
start = j;
i = 0;
} else {
if (i == 0)
start = j;
j++;
i++;
}
}
return i == len;
}
int __kmp_str_match_false(char const *data) {
int result =
__kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
__kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) ||
__kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data) ||
__kmp_str_match("disabled", 0, data);
return result;
}
int __kmp_str_match_true(char const *data) {
int result =
__kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) ||
__kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) ||
__kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data) ||
__kmp_str_match("enabled", 0, data);
return result;
}
void __kmp_str_replace(char *str, char search_for, char replace_with) {
char *found = NULL;
found = strchr(str, search_for);
while (found) {
*found = replace_with;
found = strchr(found + 1, search_for);
}
}
void __kmp_str_split(char *str,
char delim,
char **head,
char **tail
) {
char *h = str;
char *t = NULL;
if (str != NULL) {
char *ptr = strchr(str, delim);
if (ptr != NULL) {
*ptr = 0;
t = ptr + 1;
}
}
if (head != NULL) {
*head = h;
}
if (tail != NULL) {
*tail = t;
}
}
strtok_r(). */
char *__kmp_str_token(
char *str,
char const *delim,
char **buf
) {
char *token = NULL;
#if KMP_OS_WINDOWS
if (str != NULL) {
*buf = str;
}
*buf += strspn(*buf, delim);
if (**buf != 0) {
token = *buf;
*buf += strcspn(*buf, delim);
if (**buf != 0) {
**buf = 0;
*buf += 1;
}
}
#else
token = strtok_r(str, delim, buf);
#endif
return token;
}
int __kmp_basic_str_to_int(char const *str) {
int result;
char const *t;
result = 0;
for (t = str; *t != '\0'; ++t) {
if (*t < '0' || *t > '9')
break;
result = (result * 10) + (*t - '0');
}
return result;
}
int __kmp_str_to_int(char const *str, char sentinel) {
int result, factor;
char const *t;
result = 0;
for (t = str; *t != '\0'; ++t) {
if (*t < '0' || *t > '9')
break;
result = (result * 10) + (*t - '0');
}
switch (*t) {
case '\0':
factor = 1;
break;
case 'b':
case 'B':
++t;
factor = 1;
break;
case 'k':
case 'K':
++t;
factor = 1024;
break;
case 'm':
case 'M':
++t;
factor = (1024 * 1024);
break;
default:
if (*t != sentinel)
return (-1);
t = "";
factor = 1;
}
if (result > (INT_MAX / factor))
result = INT_MAX;
else
result *= factor;
return (*t != 0 ? 0 : result);
}
optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb"
or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is
case-insensitive. The routine returns 0 if everything is ok, or error code:
-1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown
unit *size is set to zero. */
void __kmp_str_to_size(
char const *str,
size_t *out,
size_t dfactor,
char const **error
) {
size_t value = 0;
size_t factor = 0;
int overflow = 0;
int i = 0;
int digit;
KMP_DEBUG_ASSERT(str != NULL);
while (str[i] == ' ' || str[i] == '\t') {
++i;
}
if (str[i] < '0' || str[i] > '9') {
*error = KMP_I18N_STR(NotANumber);
return;
}
do {
digit = str[i] - '0';
overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
value = (value * 10) + digit;
++i;
} while (str[i] >= '0' && str[i] <= '9');
while (str[i] == ' ' || str[i] == '\t') {
++i;
}
#define _case(ch, exp) \
case ch: \
case ch - ('a' - 'A'): { \
size_t shift = (exp)*10; \
++i; \
if (shift < sizeof(size_t) * 8) { \
factor = (size_t)(1) << shift; \
} else { \
overflow = 1; \
} \
} break;
switch (str[i]) {
_case('k', 1);
_case('m', 2);
_case('g', 3);
_case('t', 4);
_case('p', 5);
_case('e', 6);
_case('z', 7);
_case('y', 8);
}
#undef _case
if (str[i] == 'b' || str[i] == 'B') {
if (factor == 0) {
factor = 1;
}
++i;
}
if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) {
*error = KMP_I18N_STR(BadUnit);
return;
}
if (factor == 0) {
factor = dfactor;
}
overflow = overflow || (value > (KMP_SIZE_T_MAX / factor));
value *= factor;
while (str[i] == ' ' || str[i] == '\t') {
++i;
}
if (str[i] != 0) {
*error = KMP_I18N_STR(IllegalCharacters);
return;
}
if (overflow) {
*error = KMP_I18N_STR(ValueTooLarge);
*out = KMP_SIZE_T_MAX;
return;
}
*error = NULL;
*out = value;
}
void __kmp_str_to_uint(
char const *str,
kmp_uint64 *out,
char const **error
) {
size_t value = 0;
int overflow = 0;
int i = 0;
int digit;
KMP_DEBUG_ASSERT(str != NULL);
while (str[i] == ' ' || str[i] == '\t') {
++i;
}
if (str[i] < '0' || str[i] > '9') {
*error = KMP_I18N_STR(NotANumber);
return;
}
do {
digit = str[i] - '0';
overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
value = (value * 10) + digit;
++i;
} while (str[i] >= '0' && str[i] <= '9');
while (str[i] == ' ' || str[i] == '\t') {
++i;
}
if (str[i] != 0) {
*error = KMP_I18N_STR(IllegalCharacters);
return;
}
if (overflow) {
*error = KMP_I18N_STR(ValueTooLarge);
*out = (kmp_uint64)-1;
return;
}
*error = NULL;
*out = value;
}