* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* This source file is part of the Cangjie project, licensed under Apache-2.0
* with Runtime Library Exception.
*
* See https://cangjie-lang.cn/pages/LICENSE for license information.
*/
#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <limits.h>
#define PCRE2_STATIC
#ifndef PCRE2_CODE_UNIT_WIDTH
#define PCRE2_CODE_UNIT_WIDTH 8
#endif
#include "pcre2.h"
#ifndef ERR_MSG_LEN
#define ERR_MSG_LEN (256)
#endif
#define CJ_REGEX_MATCH_LIMIT (10000000)
#define CJ_REGEX_RECURSION_LIMIT (1000000)
typedef struct {
pcre2_code* re;
int errorCode;
PCRE2_SIZE errorOffset;
} CompileResult;
typedef struct {
uint32_t nameCount;
uint32_t nameEntrySize;
uint8_t* nameTable;
} NamedTableInfo;
extern CompileResult* CJ_REGEX_Compile(const unsigned char* pattern, const uint32_t options)
{
CompileResult* result = (CompileResult*)malloc(sizeof(CompileResult));
if (result == NULL) {
return result;
}
int errorCode = INT_MIN;
PCRE2_SIZE errorOffset;
pcre2_code* re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, options, &errorCode, &errorOffset, NULL);
result->re = re;
result->errorCode = errorCode;
result->errorOffset = errorOffset;
return result;
}
extern pcre2_match_data* CJ_REGEX_CreateMatchData(const pcre2_code* re)
{
return pcre2_match_data_create_from_pattern(re, NULL);
}
extern int64_t CJ_REGEX_Match(const pcre2_code* re, const unsigned char* subject, const PCRE2_SIZE length,
const PCRE2_SIZE offset, pcre2_match_data* matchData)
{
pcre2_match_context* mcontext = pcre2_match_context_create(NULL);
if (mcontext == NULL) {
return PCRE2_ERROR_NOMEMORY;
}
pcre2_set_match_limit(mcontext, CJ_REGEX_MATCH_LIMIT);
pcre2_set_recursion_limit(mcontext, CJ_REGEX_RECURSION_LIMIT);
int result = pcre2_match(re, subject, length, offset, 0, matchData, mcontext);
pcre2_match_context_free(mcontext);
return (int64_t)result;
}
extern PCRE2_SIZE* CJ_REGEX_GetOvector(pcre2_match_data* matchData)
{
return pcre2_get_ovector_pointer(matchData);
}
extern unsigned char* CJ_REGEX_GetErrorMsg(const int errorCode)
{
unsigned char* msg = (uint8_t*)malloc(ERR_MSG_LEN);
if (msg == NULL) {
return msg;
}
pcre2_get_error_message(errorCode, msg, ERR_MSG_LEN);
return msg;
}
static uint32_t CJ_REGEX_GetNameCount(const pcre2_code* re)
{
uint32_t count;
(void)pcre2_pattern_info(re,
PCRE2_INFO_NAMECOUNT,
&count);
return count;
}
static uint32_t CJ_REGEX_GetNameEntrySize(const pcre2_code* re)
{
uint32_t size;
(void)pcre2_pattern_info(re,
PCRE2_INFO_NAMEENTRYSIZE,
&size);
return size;
}
static uint8_t* CJ_REGEX_GetNameTable(const pcre2_code* re)
{
PCRE2_SPTR table;
(void)pcre2_pattern_info(re,
PCRE2_INFO_NAMETABLE,
&table);
return (uint8_t*)table;
}
extern NamedTableInfo* CJ_REGEX_GetNameTableInfo(const pcre2_code* re)
{
NamedTableInfo* info = (NamedTableInfo*)malloc(sizeof(NamedTableInfo));
if (info == NULL) {
return info;
}
info->nameCount = CJ_REGEX_GetNameCount(re);
info->nameEntrySize = CJ_REGEX_GetNameEntrySize(re);
info->nameTable = CJ_REGEX_GetNameTable(re);
return info;
}
extern void CJ_REGEX_FreeCode(pcre2_code* re)
{
pcre2_code_free(re);
}
extern void CJ_REGEX_FreeMatchData(pcre2_match_data* md)
{
pcre2_match_data_free(md);
}
extern int64_t CJ_REGEX_Count(const pcre2_code* re, const unsigned char* subject, const PCRE2_SIZE length,
const PCRE2_SIZE offset, const PCRE2_SIZE end, pcre2_match_data* matchData)
{
pcre2_match_context* mcontext = pcre2_match_context_create(NULL);
if (mcontext == NULL) {
return PCRE2_ERROR_NOMEMORY;
}
pcre2_set_match_limit(mcontext, CJ_REGEX_MATCH_LIMIT);
pcre2_set_recursion_limit(mcontext, CJ_REGEX_RECURSION_LIMIT);
PCRE2_SIZE startOffset = offset;
int64_t count = 0;
int matchResult = 0;
while (startOffset <= end &&
(matchResult = pcre2_match(re, subject, length, startOffset, 0, matchData, mcontext)) > 0) {
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(matchData);
startOffset = ovector[1] + (startOffset == ovector[1]);
count++;
}
pcre2_match_context_free(mcontext);
if (matchResult == PCRE2_ERROR_MATCHLIMIT) {
return PCRE2_ERROR_MATCHLIMIT;
}
if (matchResult == PCRE2_ERROR_RECURSIONLIMIT) {
return PCRE2_ERROR_RECURSIONLIMIT;
}
return count;
}