* This file contains some classes, typedefs and constant common to all
* hfst-optimized-lookup stuff. This is just to get them out of the way
* of the actual ospell code.
*/
#ifndef HFST_OSPELL_HFST_OL_H_
#define HFST_OSPELL_HFST_OL_H_
#include "hfstol-stdafx.h"
#include <vector>
#include <map>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <set>
#include <utility>
#include "ol-exceptions.h"
namespace hfst_ospell {
typedef uint16_t SymbolNumber;
typedef uint32_t TransitionTableIndex;
typedef std::vector<SymbolNumber> SymbolVector;
typedef std::vector<std::string> KeyTable;
typedef std::map<std::string, SymbolNumber> StringSymbolMap;
typedef short ValueNumber;
typedef float Weight;
class TransitionIndex;
class Transition;
class FlagDiacriticOperation;
typedef std::vector<TransitionIndex*> TransitionIndexVector;
typedef std::vector<Transition*> TransitionVector;
typedef std::map<SymbolNumber, FlagDiacriticOperation> OperationMap;
const SymbolNumber NO_SYMBOL = USHRT_MAX;
const TransitionTableIndex NO_TABLE_INDEX = UINT_MAX;
const Weight INFINITE_WEIGHT = static_cast<float>(NO_TABLE_INDEX);
const unsigned int MAX_SYMBOL_BYTES = 1000;
const TransitionTableIndex TARGET_TABLE = 2147483648u;
enum FlagDiacriticOperator {P, N, R, D, C, U};
enum HeaderFlag {Weighted, Deterministic, Input_deterministic, Minimized,
Cyclic, Has_epsilon_epsilon_transitions,
Has_input_epsilon_transitions, Has_input_epsilon_cycles,
Has_unweighted_input_epsilon_cycles};
bool is_big_endian(void);
uint16_t read_uint16_flipping_endianness(FILE * f);
uint16_t read_uint16_flipping_endianness(char * raw);
uint32_t read_uint32_flipping_endianness(char * raw);
float read_float_flipping_endianness(FILE * f);
void skip_c_string(char ** raw);
class TransducerHeader
{
private:
SymbolNumber number_of_symbols;
SymbolNumber number_of_input_symbols;
TransitionTableIndex size_of_transition_index_table;
TransitionTableIndex size_of_transition_target_table;
TransitionTableIndex number_of_states;
TransitionTableIndex number_of_transitions;
bool weighted;
bool deterministic;
bool input_deterministic;
bool minimized;
bool cyclic;
bool has_epsilon_epsilon_transitions;
bool has_input_epsilon_transitions;
bool has_input_epsilon_cycles;
bool has_unweighted_input_epsilon_cycles;
void read_property(bool &property, FILE * f);
void read_property(bool &property, char ** raw);
void skip_hfst3_header(FILE * f);
void skip_hfst3_header(char ** f);
public:
TransducerHeader(FILE * f);
TransducerHeader(char ** raw);
SymbolNumber symbol_count(void);
SymbolNumber input_symbol_count(void);
TransitionTableIndex index_table_size(void);
TransitionTableIndex target_table_size(void);
bool probe_flag(HeaderFlag flag);
};
class FlagDiacriticOperation
{
private:
const FlagDiacriticOperator operation;
const SymbolNumber feature;
const ValueNumber value;
public:
FlagDiacriticOperation(const FlagDiacriticOperator op,
const SymbolNumber feat,
const ValueNumber val):
operation(op), feature(feat), value(val) {}
FlagDiacriticOperation():
operation(P), feature(NO_SYMBOL), value(0) {}
bool isFlag(void) const;
FlagDiacriticOperator Operation(void) const;
SymbolNumber Feature(void) const;
ValueNumber Value(void) const;
};
class TransducerAlphabet
{
private:
KeyTable kt;
OperationMap operations;
SymbolNumber unknown_symbol;
SymbolNumber identity_symbol;
SymbolNumber flag_state_size;
SymbolNumber orig_symbol_count;
StringSymbolMap string_to_symbol;
void process_symbol(char * line);
void read(FILE * f, SymbolNumber number_of_symbols);
void read(char ** raw, SymbolNumber number_of_symbols);
public:
TransducerAlphabet(FILE *f, SymbolNumber number_of_symbols);
TransducerAlphabet(char ** raw, SymbolNumber number_of_symbols);
void add_symbol(std::string & sym);
void add_symbol(char * sym);
KeyTable * get_key_table(void);
OperationMap * get_operation_map(void);
SymbolNumber get_state_size(void);
SymbolNumber get_unknown(void) const;
SymbolNumber get_identity(void) const;
SymbolNumber get_orig_symbol_count(void) const;
StringSymbolMap * get_string_to_symbol(void);
bool has_string(std::string const & s) const;
bool is_flag(SymbolNumber symbol);
};
class LetterTrie;
typedef std::vector<LetterTrie*> LetterTrieVector;
class LetterTrie
{
private:
LetterTrieVector letters;
SymbolVector symbols;
public:
LetterTrie(void):
letters(UCHAR_MAX+1, static_cast<LetterTrie*>(NULL)),
symbols(UCHAR_MAX+1,NO_SYMBOL)
{}
void add_string(const char * p,SymbolNumber symbol_key);
SymbolNumber find_key(char ** p);
bool has_key_starting_with(const char c) const;
~LetterTrie();
};
class Encoder {
private:
LetterTrie letters;
SymbolVector ascii_symbols;
void read_input_symbols(KeyTable * kt, SymbolNumber number_of_input_symbols);
public:
Encoder(KeyTable * kt, SymbolNumber number_of_input_symbols);
SymbolNumber find_key(char ** p);
void read_input_symbol(const char * s, const int s_num);
void read_input_symbol(std::string const & s, const int s_num);
};
typedef std::vector<ValueNumber> FlagDiacriticState;
class TransitionIndex
{
protected:
SymbolNumber input_symbol;
TransitionTableIndex first_transition_index;
public:
static const size_t SIZE =
sizeof(SymbolNumber) + sizeof(TransitionTableIndex);
TransitionIndex(const SymbolNumber input,
const TransitionTableIndex first_transition):
input_symbol(input),
first_transition_index(first_transition)
{}
TransitionTableIndex target(void) const;
bool final(void) const;
Weight final_weight(void) const;
SymbolNumber get_input(void) const;
};
class Transition
{
protected:
SymbolNumber input_symbol;
SymbolNumber output_symbol;
TransitionTableIndex target_index;
Weight transition_weight;
public:
static const size_t SIZE =
2 * sizeof(SymbolNumber) + sizeof(TransitionTableIndex) + sizeof(Weight);
Transition(const SymbolNumber input,
const SymbolNumber output,
const TransitionTableIndex target,
const Weight w):
input_symbol(input),
output_symbol(output),
target_index(target),
transition_weight(w)
{}
Transition():
input_symbol(NO_SYMBOL),
output_symbol(NO_SYMBOL),
target_index(NO_TABLE_INDEX),
transition_weight(INFINITE_WEIGHT)
{}
TransitionTableIndex target(void) const;
SymbolNumber get_output(void) const;
SymbolNumber get_input(void) const;
Weight get_weight(void) const;
bool final(void) const;
};
class IndexTable
{
private:
char * indices;
TransitionTableIndex size;
void read(FILE * f,
TransitionTableIndex number_of_table_entries);
void read(char ** raw,
TransitionTableIndex number_of_table_entries);
void convert_to_big_endian(void);
public:
IndexTable(FILE * f,
TransitionTableIndex number_of_table_entries);
IndexTable(char ** raw,
TransitionTableIndex number_of_table_entries);
~IndexTable(void);
SymbolNumber input_symbol(TransitionTableIndex i) const;
TransitionTableIndex target(TransitionTableIndex i) const;
bool final(TransitionTableIndex i) const;
Weight final_weight(TransitionTableIndex i) const;
};
class TransitionTable
{
protected:
char * transitions;
TransitionTableIndex size;
void read(FILE * f,
TransitionTableIndex number_of_table_entries);
void read(char ** raw,
TransitionTableIndex number_of_table_entries);
void convert_to_big_endian(void);
public:
TransitionTable(FILE * f,
TransitionTableIndex transition_count);
TransitionTable(char ** raw,
TransitionTableIndex transition_count);
~TransitionTable(void);
SymbolNumber input_symbol(TransitionTableIndex i) const;
SymbolNumber output_symbol(TransitionTableIndex i) const;
TransitionTableIndex target(TransitionTableIndex i) const;
Weight weight(TransitionTableIndex i) const;
bool final(TransitionTableIndex i) const;
};
template <class printable>
void debug_print(printable p)
{
if (0) {
std::cerr << p;
}
}
}
#endif