#if HAVE_CONFIG_H
# include <config.h>
#endif
#if HAVE_LIBXML
# include <libxml++/libxml++.h>
#elif HAVE_TINYXML2
# include <tinyxml2.h>
#endif
#include <fstream>
#include <string>
#include <map>
#include <cstring>
#include <sys/stat.h>
using std::string;
using std::map;
#include "ospell.h"
#include "hfst-ol.h"
#include "ZHfstOspeller.h"
#include "ZHfstOspellerXmlMetadata.h"
namespace hfst_ospell
{
ZHfstOspellerXmlMetadata::ZHfstOspellerXmlMetadata()
{
info_.locale_ = "und";
}
static
bool
validate_automaton_id(const char* id)
{
const char* p = strchr(id, '.');
if (p == NULL)
{
return false;
}
const char* q = strchr(p + 1, '.');
if (q == NULL)
{
return false;
}
return true;
}
static
char*
get_automaton_descr_from_id(const char* id)
{
const char* p = strchr(id, '.');
const char* q = strchr(p + 1, '.');
return hfst_strndup(p + 1, q - p);
}
#if HAVE_LIBXML
void
ZHfstOspellerXmlMetadata::verify_hfstspeller(xmlpp::Node* rootNode)
{
xmlpp::Element* rootElement = dynamic_cast<xmlpp::Element*>(rootNode);
if (NULL == rootElement)
{
throw ZHfstMetaDataParsingError("Root node is not an element");
}
const Glib::ustring rootName = rootElement->get_name();
if (rootName != "hfstspeller")
{
throw ZHfstMetaDataParsingError("could not find <hfstspeller> "
"root from XML file");
}
const xmlpp::Attribute* hfstversion =
rootElement->get_attribute("hfstversion");
if (NULL == hfstversion)
{
throw ZHfstMetaDataParsingError("No hfstversion attribute in root");
}
const Glib::ustring hfstversionValue = hfstversion->get_value();
if (hfstversionValue != "3")
{
throw ZHfstMetaDataParsingError("Unrecognised HFST version...");
}
const xmlpp::Attribute* dtdversion = rootElement->get_attribute("dtdversion");
if (NULL == dtdversion)
{
throw ZHfstMetaDataParsingError("No dtdversion attribute in root");
}
const Glib::ustring dtdversionValue = dtdversion->get_value();
if (dtdversionValue != "1.0")
{
throw ZHfstMetaDataParsingError("Unrecognised DTD version...");
}
}
void
ZHfstOspellerXmlMetadata::parse_info(xmlpp::Node* infoNode)
{
xmlpp::Node::NodeList infos = infoNode->get_children();
for (auto& info : infos)
{
const Glib::ustring infoName = info->get_name();
if (infoName == "locale")
{
parse_locale(info);
}
else if (infoName == "title")
{
parse_title(info);
}
else if (infoName == "description")
{
parse_description(info);
}
else if (infoName == "version")
{
parse_version(info);
}
else if (infoName == "date")
{
parse_date(info);
}
else if (infoName == "producer")
{
parse_producer(info);
}
else if (infoName == "contact")
{
parse_contact(info);
}
else
{
const xmlpp::TextNode* text = dynamic_cast<xmlpp::TextNode*>(info);
if ((text == NULL) || (!text->is_white_space()))
{
fprintf(stderr, "DEBUG: unknown info child %s\n",
infoName.c_str());
}
}
}
}
void
ZHfstOspellerXmlMetadata::parse_locale(xmlpp::Node* localeNode)
{
xmlpp::Element* localeElement = dynamic_cast<xmlpp::Element*>(localeNode);
if (NULL == localeElement->get_child_text())
{
throw ZHfstXmlParsingError("<locale> must be non-empty");
}
const Glib::ustring localeContent = localeElement->get_child_text()->get_content();
if ((info_.locale_ != "und") && (info_.locale_ != localeContent))
{
fprintf(stderr, "Warning: mismatched languages in "
"file data (%s) and XML (%s)\n",
info_.locale_.c_str(), localeContent.c_str());
}
info_.locale_ = localeContent;
}
void
ZHfstOspellerXmlMetadata::parse_title(xmlpp::Node* titleNode)
{
xmlpp::Element* titleElement = dynamic_cast<xmlpp::Element*>(titleNode);
const xmlpp::Attribute* lang = titleElement->get_attribute("lang");
if (NULL == titleElement->get_child_text())
{
throw ZHfstXmlParsingError("<title> must be non-empty");
}
if (lang != NULL)
{
info_.title_[lang->get_value()] = titleElement->get_child_text()->get_content();
}
else
{
info_.title_[info_.locale_] = titleElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(xmlpp::Node* descriptionNode)
{
xmlpp::Element* descriptionElement =
dynamic_cast<xmlpp::Element*>(descriptionNode);
const xmlpp::Attribute* lang = descriptionElement->get_attribute("lang");
if (NULL == descriptionElement->get_child_text())
{
throw ZHfstXmlParsingError("<description> must be non-empty");
}
if (lang != NULL)
{
info_.description_[lang->get_value()] =
descriptionElement->get_child_text()->get_content();
}
else
{
info_.description_[info_.locale_] =
descriptionElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_version(xmlpp::Node* versionNode)
{
xmlpp::Element* versionElement = dynamic_cast<xmlpp::Element*>(versionNode);
const xmlpp::Attribute* revision = versionElement->get_attribute("vcsrev");
if (revision != NULL)
{
info_.vcsrev_ = revision->get_value();
}
info_.version_ = versionElement->get_child_text()->get_content();
}
void
ZHfstOspellerXmlMetadata::parse_date(xmlpp::Node* dateNode)
{
xmlpp::Element* dateElement =
dynamic_cast<xmlpp::Element*>(dateNode);
info_.date_ = dateElement->get_child_text()->get_content();
}
void
ZHfstOspellerXmlMetadata::parse_producer(xmlpp::Node* producerNode)
{
xmlpp::Element* producerElement =
dynamic_cast<xmlpp::Element*>(producerNode);
info_.producer_ = producerElement->get_child_text()->get_content();
}
void
ZHfstOspellerXmlMetadata::parse_contact(xmlpp::Node* contactNode)
{
xmlpp::Element* contactElement = dynamic_cast<xmlpp::Element*>(contactNode);
const xmlpp::Attribute* email = contactElement->get_attribute("email");
const xmlpp::Attribute* website = contactElement->get_attribute("website");
if (email != NULL)
{
info_.email_ = email->get_value();
}
if (website != NULL)
{
info_.website_ = website->get_value();
}
}
void
ZHfstOspellerXmlMetadata::parse_acceptor(xmlpp::Node* acceptorNode)
{
xmlpp::Element* acceptorElement =
dynamic_cast<xmlpp::Element*>(acceptorNode);
xmlpp::Attribute* xid = acceptorElement->get_attribute("id");
if (xid == NULL)
{
throw ZHfstMetaDataParsingError("id missing in acceptor");
}
const Glib::ustring xidValue = xid->get_value();
if (validate_automaton_id(xidValue.c_str()) == false)
{
throw ZHfstMetaDataParsingError("Invalid id in acceptor");
}
char* descr = get_automaton_descr_from_id(xidValue.c_str());
acceptor_[descr].descr_ = descr;
acceptor_[descr].id_ = xidValue;
const xmlpp::Attribute* trtype =
acceptorElement->get_attribute("transtype");
if (trtype != NULL)
{
acceptor_[descr].transtype_ = trtype->get_value();
}
const xmlpp::Attribute* xtype = acceptorElement->get_attribute("type");
if (xtype != NULL)
{
acceptor_[descr].type_ = xtype->get_value();
}
xmlpp::Node::NodeList accs = acceptorNode->get_children();
for (auto& acc : accs)
{
const Glib::ustring accName = acc->get_name();
if (accName == "title")
{
parse_title(acc, descr);
}
else if (accName == "description")
{
parse_description(acc, descr);
}
else
{
const xmlpp::TextNode* text = dynamic_cast<xmlpp::TextNode*>(acc);
if ((text == NULL) || (!text->is_white_space()))
{
fprintf(stderr, "DEBUG: unknown acceptor node %s\n",
accName.c_str());
}
}
}
free(descr);
}
void
ZHfstOspellerXmlMetadata::parse_title(xmlpp::Node* titleNode,
const string& descr)
{
xmlpp::Element* titleElement = dynamic_cast<xmlpp::Element*>(titleNode);
const xmlpp::Attribute* lang = titleElement->get_attribute("lang");
if (lang != NULL)
{
acceptor_[descr].title_[lang->get_value()] = titleElement->get_child_text()->get_content();
}
else
{
acceptor_[descr].title_[info_.locale_] = titleElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(xmlpp::Node* descriptionNode,
const string& descr)
{
xmlpp::Element* descriptionElement =
dynamic_cast<xmlpp::Element*>(descriptionNode);
const xmlpp::Attribute* lang = descriptionElement->get_attribute("lang");
if (lang != NULL)
{
acceptor_[descr].description_[lang->get_value()] = descriptionElement->get_child_text()->get_content();
}
else
{
acceptor_[descr].description_[info_.locale_] = descriptionElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_errmodel(xmlpp::Node* errmodelNode)
{
xmlpp::Element* errmodelElement =
dynamic_cast<xmlpp::Element*>(errmodelNode);
xmlpp::Attribute* xid = errmodelElement->get_attribute("id");
if (xid == NULL)
{
throw ZHfstMetaDataParsingError("id missing in errmodel");
}
const Glib::ustring xidValue = xid->get_value();
if (validate_automaton_id(xidValue.c_str()) == false)
{
throw ZHfstMetaDataParsingError("Invalid id in errmodel");
}
char* descr = get_automaton_descr_from_id(xidValue.c_str());
errmodel_.push_back(ZHfstOspellerErrModelMetadata());
size_t errm_count = errmodel_.size() - 1;
if (descr != NULL)
{
errmodel_[errm_count].descr_ = descr;
}
free(descr);
errmodel_[errm_count].id_ = xidValue;
xmlpp::Node::NodeList errms = errmodelNode->get_children();
for (auto& errm : errms)
{
const Glib::ustring errmName = errm->get_name();
if (errmName == "title")
{
parse_title(errm, errm_count);
}
else if (errmName == "description")
{
parse_description(errm, errm_count);
}
else if (errmName == "type")
{
parse_type(errm, errm_count);
}
else if (errmName == "model")
{
parse_model(errm, errm_count);
}
else
{
const xmlpp::TextNode* text = dynamic_cast<xmlpp::TextNode*>(errm);
if ((text == NULL) || (!text->is_white_space()))
{
fprintf(stderr, "DEBUG: unknown errmodel node %s\n",
errmName.c_str());
}
}
}
}
void
ZHfstOspellerXmlMetadata::parse_title(xmlpp::Node* titleNode,
size_t errm_count)
{
xmlpp::Element* titleElement = dynamic_cast<xmlpp::Element*>(titleNode);
const xmlpp::Attribute* lang = titleElement->get_attribute("lang");
if (lang != NULL)
{
errmodel_[errm_count].title_[lang->get_value()] = titleElement->get_child_text()->get_content();
}
else
{
errmodel_[errm_count].title_[info_.locale_] = titleElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(xmlpp::Node* descriptionNode,
size_t errm_count)
{
xmlpp::Element* descriptionElement =
dynamic_cast<xmlpp::Element*>(descriptionNode);
const xmlpp::Attribute* lang = descriptionElement->get_attribute("lang");
if (lang != NULL)
{
errmodel_[errm_count].description_[lang->get_value()] = descriptionElement->get_child_text()->get_content();
}
else
{
errmodel_[errm_count].description_[info_.locale_] = descriptionElement->get_child_text()->get_content();
}
}
void
ZHfstOspellerXmlMetadata::parse_type(xmlpp::Node* typeNode, size_t errm_count)
{
xmlpp::Element* typeElement = dynamic_cast<xmlpp::Element*>(typeNode);
const xmlpp::Attribute* xtype = typeElement->get_attribute("type");
if (xtype != NULL)
{
errmodel_[errm_count].type_.push_back(xtype->get_value());
}
else
{
throw ZHfstMetaDataParsingError("No type in type");
}
}
void
ZHfstOspellerXmlMetadata::parse_model(xmlpp::Node* modelNode, size_t errm_count)
{
xmlpp::Element* modelElement = dynamic_cast<xmlpp::Element*>(modelNode);
errmodel_[errm_count].model_.push_back(modelElement->get_child_text()->get_content());
}
void
ZHfstOspellerXmlMetadata::parse_xml(const xmlpp::Document* doc)
{
if (NULL == doc)
{
throw ZHfstMetaDataParsingError("Cannot parse XML data");
}
xmlpp::Node* rootNode = doc->get_root_node();
if (NULL == rootNode)
{
throw ZHfstMetaDataParsingError("No root node in index XML");
}
verify_hfstspeller(rootNode);
xmlpp::Node::NodeList nodes = rootNode->get_children();
for (auto& node : nodes)
{
const Glib::ustring nodename = node->get_name();
if (nodename == "info")
{
parse_info(node);
}
else if (nodename == "acceptor")
{
parse_acceptor(node);
}
else if (nodename == "errmodel")
{
parse_errmodel(node);
}
else
{
const xmlpp::TextNode* text = dynamic_cast<xmlpp::TextNode*>(node);
if ((text == NULL) || (!text->is_white_space()))
{
fprintf(stderr, "DEBUG: unknown root child %s\n",
nodename.c_str());
}
}
}
}
void
ZHfstOspellerXmlMetadata::read_xml(const char* xml_data, size_t xml_len)
{
xmlpp::DomParser parser;
parser.set_substitute_entities();
parser.parse_memory_raw(reinterpret_cast<const unsigned char*>(xml_data),
xml_len);
this->parse_xml(parser.get_document());
}
void
ZHfstOspellerXmlMetadata::read_xml(const string& filename)
{
xmlpp::DomParser parser;
parser.set_substitute_entities();
parser.parse_file(filename);
this->parse_xml(parser.get_document());
}
#elif HAVE_TINYXML2
void
ZHfstOspellerXmlMetadata::parse_xml(const tinyxml2::XMLDocument& doc)
{
const tinyxml2::XMLElement* rootNode = doc.RootElement();
if (NULL == rootNode)
{
throw ZHfstMetaDataParsingError("No root node in index XML");
}
if (strcmp(rootNode->Name(), "hfstspeller") != 0)
{
throw ZHfstMetaDataParsingError("could not find <hfstspeller> "
"root from XML file");
}
verify_hfstspeller(*rootNode);
const tinyxml2::XMLElement* child = rootNode->FirstChildElement();
while (child != NULL)
{
if (strcmp(child->Name(), "info") == 0)
{
parse_info(*child);
}
else if (strcmp(child->Name(), "acceptor") == 0)
{
parse_acceptor(*child);
}
else if (strcmp(child->Name(), "errmodel") == 0)
{
parse_errmodel(*child);
}
else
{
fprintf(stderr, "DEBUG: Unknown root child %s\n",
child->Name());
}
child = child->NextSiblingElement();
}
}
void
ZHfstOspellerXmlMetadata::verify_hfstspeller(const tinyxml2::XMLElement& hfstspellerNode)
{
if (!hfstspellerNode.Attribute("hfstversion"))
{
throw ZHfstMetaDataParsingError("No hfstversion attribute in root");
}
if (!hfstspellerNode.Attribute("hfstversion", "3"))
{
throw ZHfstMetaDataParsingError("Unrecognised HFST version...");
}
if (!hfstspellerNode.Attribute("dtdversion"))
{
throw ZHfstMetaDataParsingError("No dtdversion attribute in root");
}
if (!hfstspellerNode.Attribute("dtdversion", "1.0"))
{
throw ZHfstMetaDataParsingError("Unrecognised DTD version...");
}
}
void
ZHfstOspellerXmlMetadata::parse_info(const tinyxml2::XMLElement& infoNode)
{
const tinyxml2::XMLElement* info = infoNode.FirstChildElement();
while (info != NULL)
{
if (strcmp(info->Name(), "locale") == 0)
{
parse_locale(*info);
}
else if (strcmp(info->Name(), "title") == 0)
{
parse_title(*info);
}
else if (strcmp(info->Name(), "description") == 0)
{
parse_description(*info);
}
else if (strcmp(info->Name(), "version") == 0)
{
parse_version(*info);
}
else if (strcmp(info->Name(), "date") == 0)
{
parse_date(*info);
}
else if (strcmp(info->Name(), "producer") == 0)
{
parse_producer(*info);
}
else if (strcmp(info->Name(), "contact") == 0)
{
parse_contact(*info);
}
else
{
fprintf(stderr, "DEBUG: unknown info child %s\n",
info->Name());
}
info = info->NextSiblingElement();
}
}
void
ZHfstOspellerXmlMetadata::parse_locale(const tinyxml2::XMLElement& localeNode)
{
const char* localeContent = localeNode.GetText();
if (NULL == localeNode.GetText())
{
throw ZHfstXmlParsingError("<locale> must be non-empty");
}
if ((info_.locale_ != "und") && (info_.locale_ != localeContent))
{
fprintf(stderr, "Warning: mismatched languages in "
"file data (%s) and XML (%s)\n",
info_.locale_.c_str(), localeContent);
}
info_.locale_ = localeContent;
}
void
ZHfstOspellerXmlMetadata::parse_title(const tinyxml2::XMLElement& titleNode)
{
if (NULL == titleNode.GetText())
{
throw ZHfstXmlParsingError("<title> must be non-empty");
}
if (titleNode.Attribute("lang"))
{
info_.title_[titleNode.Attribute("lang")] = titleNode.GetText();
}
else
{
info_.title_[info_.locale_] = titleNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(const tinyxml2::XMLElement& descriptionNode)
{
if (NULL == descriptionNode.GetText())
{
throw ZHfstXmlParsingError("<description> must be non-empty");
}
if (descriptionNode.Attribute("lang"))
{
info_.description_[descriptionNode.Attribute("lang")] =
descriptionNode.GetText();
}
else
{
info_.description_[info_.locale_] = descriptionNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_version(const tinyxml2::XMLElement& versionNode)
{
if (versionNode.Attribute("vcsrev"))
{
info_.vcsrev_ = versionNode.Attribute("vcsrev");
}
info_.version_ = versionNode.GetText();
}
void
ZHfstOspellerXmlMetadata::parse_date(const tinyxml2::XMLElement& dateNode)
{
info_.date_ = dateNode.GetText();
}
void
ZHfstOspellerXmlMetadata::parse_producer(const tinyxml2::XMLElement& producerNode)
{
info_.producer_ = producerNode.GetText();
}
void
ZHfstOspellerXmlMetadata::parse_contact(const tinyxml2::XMLElement& contactNode)
{
if (contactNode.Attribute("email"))
{
info_.email_ = contactNode.Attribute("email");
}
if (contactNode.Attribute("website"))
{
info_.website_ = contactNode.Attribute("website");
}
}
void
ZHfstOspellerXmlMetadata::parse_acceptor(const tinyxml2::XMLElement& acceptorNode)
{
const char* xid = acceptorNode.Attribute("id");
if (xid == NULL)
{
throw ZHfstMetaDataParsingError("id missing in acceptor");
}
if (validate_automaton_id(xid) == false)
{
throw ZHfstMetaDataParsingError("Invalid id in accpetor");
}
char* descr = get_automaton_descr_from_id(xid);
acceptor_[descr].descr_ = descr;
acceptor_[descr].id_ = xid;
if (acceptorNode.Attribute("trtype"))
{
acceptor_[descr].transtype_ = acceptorNode.Attribute("trtype");
}
if (acceptorNode.Attribute("type"))
{
acceptor_[descr].type_ = acceptorNode.Attribute("type");
}
const tinyxml2::XMLElement* acc = acceptorNode.FirstChildElement();
while (acc != NULL)
{
if (strcmp(acc->Name(), "title") == 0)
{
parse_title(*acc, descr);
}
else if (strcmp(acc->Name(), "description") == 0)
{
parse_description(*acc, descr);
}
else
{
fprintf(stderr, "DEBUG: unknown acceptor child %s\n",
acc->Name());
}
acc = acc->NextSiblingElement();
}
free(descr);
}
void
ZHfstOspellerXmlMetadata::parse_title(const tinyxml2::XMLElement& titleNode,
const std::string& accName)
{
if (titleNode.Attribute("lang"))
{
acceptor_[accName].title_[titleNode.Attribute("lang")] =
titleNode.GetText();
}
else
{
acceptor_[accName].title_[info_.locale_] =
titleNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(const tinyxml2::XMLElement& descriptionNode,
const std::string& accName)
{
if (descriptionNode.Attribute("lang"))
{
acceptor_[accName].description_[descriptionNode.Attribute("lang")] =
descriptionNode.GetText();
}
else
{
acceptor_[accName].description_[info_.locale_] =
descriptionNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_errmodel(const tinyxml2::XMLElement& errmodelNode)
{
const char* xid = errmodelNode.Attribute("id");
if (xid == NULL)
{
throw ZHfstMetaDataParsingError("id missing in errmodel");
}
if (validate_automaton_id(xid) == false)
{
throw ZHfstMetaDataParsingError("Invalid id in errmodel");
}
char* descr = get_automaton_descr_from_id(xid);
errmodel_.push_back(ZHfstOspellerErrModelMetadata());
size_t errm_count = errmodel_.size() - 1;
if (descr != NULL)
{
errmodel_[errm_count].descr_ = descr;
}
free(descr);
errmodel_[errm_count].id_ = xid;
const tinyxml2::XMLElement* errm = errmodelNode.FirstChildElement();
while (errm != NULL)
{
if (strcmp(errm->Name(), "title") == 0)
{
parse_title(*errm, errm_count);
}
else if (strcmp(errm->Name(), "description") == 0)
{
parse_description(*errm, errm_count);
}
else if (strcmp(errm->Name(), "type") == 0)
{
parse_type(*errm, errm_count);
}
else if (strcmp(errm->Name(), "model") == 0)
{
parse_model(*errm, errm_count);
}
else
{
fprintf(stderr, "DEBUG: unknown errmodel child %s\n",
errm->Name());
}
errm = errm->NextSiblingElement();
}
}
void
ZHfstOspellerXmlMetadata::parse_title(const tinyxml2::XMLElement& titleNode, size_t errm_count)
{
if (titleNode.Attribute("lang"))
{
errmodel_[errm_count].title_[titleNode.Attribute("lang")] =
titleNode.GetText();
}
else
{
errmodel_[errm_count].title_[info_.locale_] = titleNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_description(const tinyxml2::XMLElement& descriptionNode, size_t errm_count)
{
if (descriptionNode.Attribute("lang"))
{
errmodel_[errm_count].description_[descriptionNode.Attribute("lang")] =
descriptionNode.GetText();
}
else
{
errmodel_[errm_count].description_[info_.locale_] =
descriptionNode.GetText();
}
}
void
ZHfstOspellerXmlMetadata::parse_type(const tinyxml2::XMLElement& typeNode, size_t errm_count)
{
if (typeNode.Attribute("type"))
{
errmodel_[errm_count].type_.push_back(typeNode.Attribute("type"));
}
else
{
throw ZHfstMetaDataParsingError("No type in type");
}
}
void
ZHfstOspellerXmlMetadata::parse_model(const tinyxml2::XMLElement& modelNode, size_t errm_count)
{
errmodel_[errm_count].model_.push_back(modelNode.GetText());
}
void
ZHfstOspellerXmlMetadata::read_xml(const char* xml_data, size_t xml_len)
{
tinyxml2::XMLDocument doc;
if (doc.Parse(xml_data, xml_len) != tinyxml2::XML_NO_ERROR)
{
throw ZHfstMetaDataParsingError("Reading XML from memory");
}
this->parse_xml(doc);
}
void
ZHfstOspellerXmlMetadata::read_xml(const string& filename)
{
tinyxml2::XMLDocument doc;
if (doc.LoadFile(filename.c_str()) != tinyxml2::XML_NO_ERROR)
{
throw ZHfstMetaDataParsingError("Reading XML from file");
}
this->parse_xml(doc);
}
#else
void
ZHfstOspellerXmlMetadata::read_xml(const char* xml_data, size_t xml_len)
{
auto b = strstr(xml_data, "<locale>");
auto e = strstr(b, "</locale>");
if (b == nullptr || e == nullptr || b >= e) {
throw ZHfstMetaDataParsingError("Could not find <locale>...</locale> in the XML");
}
info_.locale_.assign(b + 8, e);
b = strstr(xml_data, "<title>");
e = strstr(b, "</title>");
if (b == nullptr || e == nullptr || b >= e) {
throw ZHfstMetaDataParsingError("Could not find <title>...</title> in the XML");
}
info_.title_[info_.locale_].assign(b + 7, e);
b = strstr(xml_data, "<description>");
e = strstr(b, "</description>");
if (b == nullptr || e == nullptr || b >= e) {
throw ZHfstMetaDataParsingError("Could not find <description>...</description> in the XML");
}
info_.description_[info_.locale_].assign(b + 13, e);
}
void
ZHfstOspellerXmlMetadata::read_xml(const std::string& filename)
{
std::ifstream file(filename, std::ios::binary);
struct stat st;
stat(filename.c_str(), &st);
std::string xml(static_cast<size_t>(st.st_size), 0);
file.read(&xml[0], static_cast<size_t>(st.st_size));
read_xml(xml.c_str(), xml.size());
}
#endif
string
ZHfstOspellerXmlMetadata::debug_dump() const
{
string retval = "locale: " + info_.locale_ + "\n"
"version: " + info_.version_ + " [vcsrev: " + info_.vcsrev_ + "]\n"
"date: " + info_.date_ + "\n"
"producer: " + info_.producer_ + "[email: <" + info_.email_ + ">, "
"website: <" + info_.website_ + ">]\n";
for (auto& title : info_.title_)
{
retval.append("title [" + title.first + "]: " + title.second + "\n");
}
for (auto& description : info_.description_)
{
retval.append("description [" + description.first + "]: " +
description.second + "\n");
}
for (auto& acc : acceptor_)
{
retval.append("acceptor[" + acc.second.descr_ + "] [id: " + acc.second.id_ +
", type: " + acc.second.type_ + "trtype: " + acc.second.transtype_ +
"]\n");
for (auto& title : acc.second.title_)
{
retval.append("title [" + title.first + "]: " + title.second +
"\n");
}
for (auto& description : acc.second.description_)
{
retval.append("description[" + description.first + "]: "
+ description.second + "\n");
}
}
for (auto& errm : errmodel_)
{
retval.append("errmodel[" + errm.descr_ + "] [id: " + errm.id_ +
"]\n");
for (auto& title : errm.title_)
{
retval.append("title [" + title.first + "]: " + title.second +
"\n");
}
for (auto& description : errm.description_)
{
retval.append("description[" + description.first + "]: "
+ description.second + "\n");
}
for (auto& type : errm.type_)
{
retval.append("type: " + type + "\n");
}
for (auto& model : errm.model_)
{
retval.append("model: " + model + "\n");
}
}
return retval;
}
}