d503e088创建于 2025年12月11日历史提交
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File line.cpp
*
* Modification History:
*
*   Date        Name        Description
*   03/18/2003  weiv        Creation.
*******************************************************************************
*/

#include "line.h"
#include <stdio.h>

UnicodeSet * Line::needsQuoting = nullptr;

void
Line::init()
{
        len = 0;
        expLen = 0;
        strength = UCOL_OFF;
        strengthFromEmpty = UCOL_OFF;
        cumulativeStrength = UCOL_OFF;
        expStrength = UCOL_OFF;
        previous = nullptr;
        next = nullptr;
        left = nullptr;
        right = nullptr;
        isContraction = false;
        isExpansion = false;
        isRemoved = false;
        isReset = false;
        expIndex = 0;
        firstCC = 0;
        lastCC = 0;
        sortKey = nullptr;
}

Line::Line()
{
  init();
  memset(name, 0, 25*sizeof(char16_t));
  memset(expansionString, 0, 25*sizeof(char16_t));
}

Line::Line(const char16_t* name, int32_t len)
{
  init();
  this->len = len;
  u_memcpy(this->name, name, len);
  memset(expansionString, 0, 25*sizeof(char16_t));
  UChar32 c;
  U16_GET(name, 0, 0, len, c);
  firstCC = u_getCombiningClass(c);
  U16_GET(name, 0, len-1, len, c);
  lastCC = u_getCombiningClass(c);
}

Line::Line(const char16_t name)
{
    init();
    len = 1;
    this->name[0] = name;
    this->name[1] = 0;
  memset(expansionString, 0, 25*sizeof(char16_t));
  firstCC = u_getCombiningClass(name);
  lastCC = firstCC;
}

Line::Line(const UnicodeString &string)
{
  init();
  setTo(string);
}

Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
previous(nullptr),
next(nullptr),
left(nullptr),
right(nullptr)
{
  initFromString(buff, buffLen, status);
}

Line::Line(const Line &other) :
  previous(nullptr),
  next(nullptr),
left(nullptr),
right(nullptr)
{
  *this = other;
}
         
Line &
Line::operator=(const Line &other) {
  len = other.len;
  expLen = other.expLen;
  strength = other.strength;
  strengthFromEmpty = other.strengthFromEmpty;
  cumulativeStrength = other.cumulativeStrength;
  expStrength = other.expStrength;
  isContraction = other.isContraction;
  isExpansion = other.isExpansion;
  isRemoved = other.isRemoved;
  isReset = other.isReset;
  expIndex = other.expIndex;
  firstCC = other.firstCC;
  lastCC = other.lastCC;
  u_strcpy(name, other.name);
  u_strcpy(expansionString, other.expansionString);
  sortKey = other.sortKey;
  left = other.left;
  right = other.right;
  return *this;
}

UBool 
Line::operator==(const Line &other) const {
  if(this == &other) {
    return true;
  }
  if(len != other.len) {
    return false;
  }
  if(u_strcmp(name, other.name) != 0) {
    return false;
  }
  return true;
}

UBool 
Line::equals(const Line &other) const {
  if(this == &other) {
    return true;
  }
  if(len != other.len) {
    return false;
  }
  if(u_strcmp(name, other.name) != 0) {
    return false;
  }
  if(strength != other.strength) {
    return false;
  }
  if(expLen != other.expLen) {
    return false;
  }
  if(u_strcmp(expansionString, other.expansionString)) {
    return false;
  }
  return true;
}

UBool
Line::operator!=(const Line &other) const {
  return !(*this == other);
}


Line::~Line() {
}

void
Line::copyArray(Line *dest, const Line *src, int32_t size) {
  int32_t i = 0;
  for(i = 0; i < size; i++) {
    dest[i] = src[i];
  }
}

void
Line::setName(const char16_t* name, int32_t len) {
  this->len = len;
  u_memcpy(this->name, name, len);
  UChar32 c;
  U16_GET(name, 0, 0, len, c);
  firstCC = u_getCombiningClass(c);
  U16_GET(name, 0, len-1, len, c);
  lastCC = u_getCombiningClass(c);
}

void 
Line::setToConcat(const Line *first, const Line *second) {
  u_strcpy(name, first->name);
  u_strcat(name, second->name);
  len = first->len + second->len;
  firstCC = first->firstCC;
  lastCC = second->lastCC;
}

UnicodeString
Line::stringToName(char16_t *string, int32_t len) {
  UErrorCode status = U_ZERO_ERROR;
  UnicodeString result;
  char buffer[256];
  int32_t i = 0;
  UChar32 c; 
  while(i < len) {
    U16_NEXT(string, i, len, c);
    if(c < 0x10000) {
      sprintf(buffer, "%04X ", c);
    } else {
      sprintf(buffer, "%06X ", c);
    }
    result.append(buffer);
  }
  i = 0;
  while(i < len) {
    U16_NEXT(string, i, len, c);
    u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
    result.append("{");
    result.append(buffer);
    result.append("} ");
  }
/*
  for(i = 0; i < len; i++) {
    sprintf(buffer, "%04X ", string[i]);
    result.append(buffer);
  }
  for(i = 0; i < len; i++) {
    u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
    result.append("{");
    result.append(buffer);
    result.append("} ");
  }
*/  
  return result;
}

UnicodeString
Line::toBundleString() 
{

  UnicodeString result;
  UErrorCode status = U_ZERO_ERROR;
  if(!needsQuoting) {
    needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
  }
  char16_t NFC[50];
  int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
  result.append("\"");
  if(isReset) {
    result.append("&");
  } else {
    result.append(strengthToString(strength, false, false));
  }
  UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
  if(quote) {
    result.append("'");
  }
  if(NFC[0] == 0x22) {
    result.append("\\u0022");
  } else {
    result.append(NFC, NFCLen);
  }
  if(quote && NFC[0] != 0x0027) {
    result.append("'");
  }
  if(expLen && !isReset) {
    quote = needsQuoting->containsSome(expansionString);
    result.append(" / ");
    if(quote) {
      result.append("'");
    }
    result.append(expansionString);
    if(quote) {
      result.append("'");
    }
  }
  result.append("\" //");

  result.append(stringToName(NFC, NFCLen));
  if(expLen && !isReset) {
    result.append(" / ");
    result.append(stringToName(expansionString, expLen));
  }
  result.append("\n");
  return result;
}

UnicodeString
Line::toHTMLString() 
{
  UnicodeString result;
  UErrorCode status = U_ZERO_ERROR;
  char16_t NFC[50];
  int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
  result.append("<span title=\"");
  result.append(stringToName(NFC, NFCLen));
  if(expLen && !isReset) {
    result.append(" / ");
    result.append(stringToName(expansionString, expLen));
  }
  result.append("\">");
  if(isReset) {
    result.append("&amp;");
  } else {
    result.append(strengthToString(strength, false, true));
  }
  result.append(NFC, NFCLen);
  if(expLen && !isReset) {
    result.append("&nbsp;/&nbsp;");
    result.append(expansionString);
  }
  result.append("</span><br>\n");
  return result;
}

UnicodeString
Line::toString(UBool pretty) {
  UnicodeString result;
  if(!pretty) {
    result.setTo(name);
    if(expLen) {
      result.append("/");
      result.append(expansionString);
    }
  } else {
    UErrorCode status = U_ZERO_ERROR;
    char16_t NFC[50];
    int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
    result.setTo(NFC, NFCLen);
    if(expLen) {
      result.append("/");
      result.append(expansionString);
    }
    /*
    if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
      result.append("(NFC: ");
      result.append(NFC, NFCLen);
      result.append(stringToName(NFC, NFCLen));
      result.append(")");
    }
    */
    result.append("    # ");
    result.append(stringToName(NFC, NFCLen));
    if(expLen) {
      result.append("/ ");
      result.append(stringToName(expansionString, expLen));
    }
  }
  return result;
}


void
Line::setTo(const UnicodeString &string) {
  int32_t len = string.length();
  u_strncpy(name, string.getBuffer(), len);
  name[len] = 0;
  this->len = len;
  UChar32 c;
  U16_GET(name, 0, 0, len, c);
  firstCC = u_getCombiningClass(c);
  U16_GET(name, 0, len-1, len, c);
  lastCC = u_getCombiningClass(c);
}

void 
Line::setTo(const UChar32 n) {
  UBool isError = false;
  len = 0; // we are setting the line to char, not appending
  U16_APPEND(name, len, 25, n, isError);
  name[len] = 0;
  firstCC = u_getCombiningClass(n);
  lastCC = firstCC;
}


UnicodeString
Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result) 
{
  int i;
  int numIndents = strength+1;
  if(strength > UCOL_IDENTICAL) {
    return result;
  } else if(strength == UCOL_IDENTICAL) {
    numIndents = 5;
  }
  for(i = 0; i < numIndents*indentSize; i++) {
    result.append(" ");
  }
  return result;
}

UnicodeString 
Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
  UnicodeString result;
  if(html) {
    switch(strength) {
    case UCOL_IDENTICAL:
      result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;=&nbsp;");
      break;
    case UCOL_QUATERNARY:
      result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&lt;&lt;&nbsp;");
      break;
    case UCOL_TERTIARY:
      result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&lt;&nbsp;");
      break;
    case UCOL_SECONDARY:
      result.append("&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&nbsp;");
      break;
    case UCOL_PRIMARY:
      result.append("&nbsp;&nbsp;&lt;&nbsp;");
      break;
    case UCOL_OFF:
      result.append("&nbsp;&gt;?&nbsp;");
      break;
    default:
      result.append("&nbsp;?!&nbsp;");
      break;
    }
  } else {
    switch(strength) {
    case UCOL_IDENTICAL:
      if(pretty) {
        result.append("        ");
      }
      result.append(" = ");
      break;
    case UCOL_QUATERNARY:
      if(pretty) {
        result.append("        ");
      }
      result.append(" <<<< ");
      break;
    case UCOL_TERTIARY:
      //u_fprintf(file, "<3");
      if(pretty) {
        result.append("      ");
      }
      result.append(" <<< ");
      break;
    case UCOL_SECONDARY:
      //u_fprintf(file, "<2");
      if(pretty) {
        result.append("    ");
      }
      result.append(" << ");
      break;
    case UCOL_PRIMARY:
      //u_fprintf(file, "<1");
      if(pretty) {
        result.append("  ");
      }
      result.append(" < ");
      break;
    case UCOL_OFF:
      result.append(" >? ");
      break;
    default:
      result.append(" ?! ");
      break;
    }
  }
  return result;
}

Line *
Line::nextInteresting() {
  Line *result = this->next;
  while(result && result->strength != UCOL_IDENTICAL) {
    result = result->next;
  }
  return result;
}

void
Line::append(const char16_t* n, int32_t length)
{
  u_strncat(name, n, length);
  name[len+length] = 0;
  len += length;
  UChar32 end;
  U16_GET(n, 0, length-1, length, end);
  lastCC = u_getCombiningClass(end);
}

void
Line::append(const char16_t n)
{
  name[len] = n;
  name[len+1] = 0;
  len++;
  lastCC = u_getCombiningClass(n);
}

void
Line::append(const Line &l)
{
  append(l.name, l.len);
  lastCC = l.lastCC;
}

void
Line::clear()
{
  name[0] = 0;
  len = 0;
}

int32_t
Line::write(char *buff, int32_t, UErrorCode &) 
{
  /*
    char16_t  name[25];
    int32_t   len;
    char16_t  expansionString[25];
    int32_t   expLen;

    UColAttributeValue strength;
    UColAttributeValue strengthFromEmpty;
    UColAttributeValue cumulativeStrength;
    UColAttributeValue expStrength;

    Line *previous;
    Line *next;

    UBool   isContraction;
    UBool   isExpansion;
    UBool   isRemoved;
    UBool   isReset;

    int32_t expIndex;
    uint8_t firstCC;
    uint8_t lastCC;
*/
  int32_t resLen = 0;
  int32_t i = 0;
  sprintf(buff+resLen, "%04X", name[0]);
  resLen += 4;
  for(i = 1; i < len; i++) {
    sprintf(buff+resLen, " %04X", name[i]);
    resLen += 5;
  }
  sprintf(buff+resLen, "/");
  resLen += 1;

  i = 0;
  if(expLen) {
    sprintf(buff+resLen, "%04X", expansionString[0]);
    resLen += 4;
    for(i = 1; i < expLen; i++) {
      sprintf(buff+resLen, " %04X", expansionString[i]);
      resLen += 5;
    }
  }
  sprintf(buff+resLen, "; ");
  resLen += 2;

  sprintf(buff+resLen, "%02i ", strength);
  resLen += 3;
  sprintf(buff+resLen, "%02i", strengthFromEmpty);
  resLen += 2;
  sprintf(buff+resLen, "%02i", cumulativeStrength);
  resLen += 2;
  sprintf(buff+resLen, "%02i", expStrength);
  resLen += 2;

  // Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
  //sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
  //resLen += 5;
  sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
  resLen += 3;

  // first and last CC
  // can be calculated on reading
  //sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
  //resLen += 8;

  sprintf(buff+resLen, "%08X", expIndex);
  resLen += 8;

  buff[resLen] = 0;

  return resLen;
}

void
Line::initFromString(const char *buff, int32_t, UErrorCode &)
{
  int32_t bufIndex = 0;
  int32_t i = 0;

  sscanf(buff+bufIndex, "%04X", &name[i]);
  i++;
  bufIndex += 4;
  while(buff[bufIndex] != '/') {
    sscanf(buff+bufIndex, " %04X", &name[i]);
    i++;
    bufIndex += 5;
  }
  len = i;
  name[len] = 0;
  bufIndex++;

  if(i > 1) {
    isContraction = true;
  } else {
    isContraction = false;
  }

  if(buff[bufIndex] == ';') {
    isExpansion = false;
    bufIndex += 2;
    expansionString[0] = 0;
    expLen = 0;
  } else {
    i = 0;
    sscanf(buff+bufIndex, "%04X", &expansionString[i]);
    i++;
    bufIndex += 4;
    while(buff[bufIndex] != ';') {
      sscanf(buff+bufIndex, " %04X", &expansionString[i]);
      i++;
      bufIndex += 5;
    }
    expLen = i;
    expansionString[expLen] = 0;
    bufIndex += 2;
  }
  sscanf(buff+bufIndex, "%02i ", &strength);
  bufIndex += 3;
  sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
  bufIndex += 2;
  sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
  bufIndex += 2;
  sscanf(buff+bufIndex, "%02i", &expStrength);
  bufIndex += 2;

  sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
  bufIndex += 3;

  sscanf(buff+bufIndex, "%08X", &expIndex);
  bufIndex += 8;

  // calculate first and last CC
  UChar32 c;
  U16_GET(name, 0, 0, len, c);
  firstCC = u_getCombiningClass(c);
  U16_GET(name, 0, len-1, len, c);
  lastCC = u_getCombiningClass(c);
}

void
Line::swapCase(char16_t *string, int32_t &sLen)
{
  UChar32 c = 0;
  int32_t i = 0, j = 0;
  char16_t buff[256];
  UBool isError = false;
  while(i < sLen) {
    U16_NEXT(string, i, sLen, c);
    if(u_isUUppercase(c)) {
      c = u_tolower(c);
    } else if(u_isULowercase(c)) {
      c = u_toupper(c);
    }
    U16_APPEND(buff, j, 256, c, isError);
  }
  buff[j] = 0;
  u_strcpy(string, buff);
  sLen = j;
}


void
Line::swapCase()
{
  swapCase(name, len);
  swapCase(expansionString, expLen);
}

UnicodeString
Line::dumpSortkey() 
{

  char buffer[256];
  char *buff = buffer;
  *buff = 0;
  uint8_t *key = sortKey;
  if(sortKey) {
    while(*key) {
      sprintf(buff, "%02X ", *key);
      key++;
      buff += 3;
      if(buff - buffer > 252) {
        break;
      }
    }
  }
  return UnicodeString(buffer);
}