45#include "EST_String.h"
49#include "EST_multistats.h"
54const EST_String PredictionSuffixTree_oov(
"_OOV_");
60EST_PredictionSuffixTree_tree_node::~EST_PredictionSuffixTree_tree_node()
65EST_PredictionSuffixTree_tree_node::print_freqs(
ostream &
os)
80 os << get_path() <<
" " << s <<
" : " << freq <<
endl;
86 for (t.
begin(nodes); t; t++)
92EST_PredictionSuffixTree_tree_node::print_probs(
ostream &
os)
101 os << get_path() <<
" :";
105 os <<
" " << s <<
" " << prob;
112 for (t.
begin(nodes); t; t++)
118EST_PredictionSuffixTree_tree_node::most_probable(
double *prob)
const
124EST_PredictionSuffixTree::EST_PredictionSuffixTree(
void)
133EST_PredictionSuffixTree::init(
const int order)
138 nodes->set_level(order-1);
142EST_PredictionSuffixTree::~EST_PredictionSuffixTree()
149EST_PredictionSuffixTree::clear(
void)
159 const int index)
const
163 if (
words.n() == index+1){
164 return node->prob_dist();
174 return PSTnullProbDistribution;
176 return p_prob_dist(next,
words,index+1);
194 if (
words.n()+index < p_order)
195 cerr <<
"EST_PredictionSuffixTree: accumulating window is too small"
200 p_accumulate(nodes,
words,count,index);
211 if (
words.n() == index+1)
213 if (
node->prob_dist().samples() == 0)
214 node->set_state(num_states++);
225 if (
node->get_path() ==
"")
226 next->set_path(
words(index));
229 next->set_path(
node->get_path()+
" "+
words(index));
231 next->set_level(
node->get_level()-1);
232 node->nodes.set_val(
words(index),est_val(next));
234 p_accumulate(next,
words,count,index+1);
243 double d1 =
pg.frequency(
words(order()-1));
244 double d2 = pd->frequency(
words(order()-1));
254 return (
double)
pg.frequency(
words(order()-1)) /
255 pd->frequency(
words(order()-1));
263 return ppredict(nodes,
words,&p,&state);
270 return ppredict(nodes,
words,p,&state);
276 return ppredict(nodes,
words,p,state);
282 double *p,
int *state,
283 const int index)
const
286 if (
words.n() == index+1)
288 *state =
node->get_state();
289 return node->most_probable(p);
300 return PredictionSuffixTree_oov;
303 return ppredict(next,
words,p,state,index+1);
308EST_PredictionSuffixTree::print_freqs(
ostream &
os)
312 os <<
"EST_PredictionSuffixTree order=" << p_order <<
endl;
313 nodes->print_freqs(
os);
318EST_PredictionSuffixTree::print_probs(
ostream &
os)
322 os <<
"EST_PredictionSuffixTree " << p_order <<
endl;
323 nodes->print_probs(
os);
328EST_PredictionSuffixTree::save(
const EST_String filename,
const EST_PredictionSuffixTree::EST_filetype type)
344EST_PredictionSuffixTree::load(
const EST_String filename)
351 if (
ts.open(filename) != 0)
353 cerr <<
"EST_PredictionSuffixTree: failed to open \"" << filename <<
"\" for reading\n";
356 ts.set_SingleCharSymbols(
":");
358 if (
ts.get() !=
"EST_PredictionSuffixTree")
360 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" not an EST_PredictionSuffixTree\n";
364 order =
atoi(
ts.get().string());
365 if ((order < 1) || (order > 10))
367 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" has bad order\n";
373 for (i=0; i<p_order; i++)
379 window[p_order-1] =
ts.get().string();
382 cerr <<
"EST_PredictionSuffixTree: file \"" << filename <<
"\" missed parsed line ";
383 cerr <<
ts.linenum() <<
" near EST_PredictionSuffixTree\n";
384 for (i=0; i < order; i++)
389 freq =
atoi(
ts.get().string());
397EST_PredictionSuffixTree::build(
const EST_String filename,
407 else if (
ts.open(filename) == -1)
412 for (i=0; i<p_order-1; i++)
423 window[p_order-1] =
ts.get().string();
448 for (i=0; i<p_order; i++)
462EST_PredictionSuffixTree::test(
const EST_String filename)
474 else if (
ts.open(filename) == -1)
480 for (p.
begin(nodes->nodes); p; p++)
486 for (i=0; i<p_order; i++)
494 window[p_order-1] =
ts.get().string();
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index
EST_Litem * item_start() const
Used for iterating through members of the distribution.
void item_prob(EST_Litem *idx, EST_String &s, double &prob) const
During iteration returns name and probability given index.
const EST_String & most_probable(double *prob=NULL) const
Return the most probable member of the distribution.
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences.
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
void begin(const Container &over)
Set the iterator ready to run over this container.