00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00028 #ifndef SEQPP_PMARKOV_H
00029 #define SEQPP_PMARKOV_H
00030
00031 #include <seqpp/Markov.h>
00032 #include <seqpp/PhasedPMarkov.h>
00033 #include <seqpp/pmm_tree.h>
00034
00045 class PMarkov : public Markov
00046 {
00047 protected :
00048 pmm_forest * _p_f;
00049
00050 public:
00052
00060 PMarkov( Partition& p,
00061 const SequenceSet & seqset,
00062 const string& prior_alpha_file = string(),
00063 bool motif_prior = true,
00064 double penalty = 0.,
00065 const string & xmlfile = string() )
00066 : Markov( seqset.tell_alphabet_size(), seqset.tell_order() )
00067 {
00068 seqset.count_occurencies();
00069 if (!this->load_prior_alpha( prior_alpha_file ))
00070 this->default_prior_alpha();
00071 _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior, penalty );
00072 select( seqset.get_count(), true, seqset.get_translator(), xmlfile );
00073 }
00074
00076
00085 PMarkov( Partition& p,
00086 const Sequence & seq,
00087 const string& prior_alpha_file = string(),
00088 bool motif_prior = true,
00089 double penalty = 0.,
00090 const Translator & trans = Translator(),
00091 const string & xmlfile = string() )
00092 : Markov( seq.tell_alphabet_size(), seq.tell_order() )
00093 {
00094 seq.count_occurencies();
00095 if (!this->load_prior_alpha( prior_alpha_file ))
00096 this->default_prior_alpha();
00097 _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior, penalty );
00098 select( seq.get_count(), true, trans, xmlfile );
00099 }
00100
00101
00103
00114 PMarkov( Partition& p,
00115 unsigned long * count,
00116 short size, short order,
00117 const string& prior_alpha_file = string(),
00118 bool motif_prior = true,
00119 double penalty = 0.,
00120 const Translator & trans = Translator(),
00121 const string & xmlfile = string() )
00122 : Markov( size, order )
00123 {
00124 if (!this->load_prior_alpha( prior_alpha_file ))
00125 this->default_prior_alpha();
00126 _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior, penalty );
00127 select( count, false, trans, xmlfile );
00128 }
00129
00131
00142 PMarkov( Partition& part,
00143 const string & count_file,
00144 short size, short order,
00145 const string& prior_alpha_file = string(),
00146 bool motif_prior = true,
00147 double penalty = 0.,
00148 const Translator & trans = Translator(),
00149 const string & xmlfile = string() )
00150 : Markov( size, order )
00151 {
00152 unsigned long * count = new unsigned long[_nPi];
00153 if (! this->load_count( count_file, &count )){
00154 cerr<<"Bad count file"<<endl;
00155 exit(1);
00156 }
00157 if (!this->load_prior_alpha( prior_alpha_file ))
00158 this->default_prior_alpha();
00159 _p_f = new pmm_forest( part, _size, _order, _prior_alpha[0], motif_prior, penalty );
00160 select( count, false, trans, xmlfile );
00161
00162 delete [] count;
00163 }
00164
00166
00175 PMarkov( Partition& part,
00176 short size,
00177 short order,
00178 const string& prior_alpha_file = string(),
00179 bool motif_prior = true,
00180 double penalty = 0.,
00181 bool alloc = true )
00182 : Markov( size, order, alloc )
00183 {
00184 if (!this->load_prior_alpha( prior_alpha_file ))
00185 this->default_prior_alpha();
00186 _p_f = new pmm_forest( part, _size, _order, _prior_alpha[0], motif_prior, penalty );
00187 }
00188
00189
00191
00197 void select( unsigned long * count, bool decal_required,
00198 const Translator & trans = Translator(),
00199 const string & xmlfile = string() )
00200 {
00201 _nb_param = 0;
00202 long decal = 0;
00203 if (decal_required)
00204 decal = _jump;
00205
00206 #ifdef HAVE_LIBXML2
00207 if (xmlfile.size() != 0){
00208 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00209 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00210 xmlDocSetRootElement(doc,root_node);
00211
00212 pmm_tree p_t = _p_f->select( count+decal );
00213 p_t.tree_to_matrix( _Pi );
00214 _nb_param += p_t.nb_leaves() * (_size-1);
00215
00216 std::stringstream treenamestream;
00217 p_t.save( trans, root_node, treenamestream.str() );
00218 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00219 xmlFreeDoc(doc);
00220 }
00221 else{
00222
00223 pmm_tree p_t = _p_f->select( count+decal );
00224 p_t.tree_to_matrix( _Pi );
00225 _nb_param += p_t.nb_leaves() * (_size-1);
00226
00227 }
00228 #endif
00229 #ifndef HAVE_LIBXML2
00230 if (xmlfile.size() != 0){
00231 cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00232 }
00233
00234 pmm_tree p_t = _p_f->select( count+decal );
00235 p_t.tree_to_matrix( _Pi );
00236 _nb_param += p_t.nb_leaves() * (_size-1);
00237
00238 #endif
00239 }
00240
00241
00243
00249 template <class TSeq1, class TSeq2>
00250 double mean_post_log_likelihood( const TSeq1& tseq_train, const TSeq2& tseq_eval,
00251 short initial_phase_train = 0, short initial_phase_eval = 0 )
00252 {
00253 tseq_train.count_occurencies();
00254 tseq_eval.count_occurencies();
00255 return this->mean_post_log_likelihood( tseq_train.get_count(), true,
00256 tseq_eval.get_count(), true );
00257 }
00258
00260
00264 template <class TSeq>
00265 double mean_post_log_likelihood( const TSeq& tseq_eval,
00266 short initial_phase_eval = 0 )
00267 {
00268 tseq_eval.count_occurencies();
00269 return this->mean_post_log_likelihood( tseq_eval.get_count(), true );
00270 }
00271
00272
00274
00280 double mean_post_log_likelihood( unsigned long * count_train, bool decal_required_t,
00281 unsigned long * count_eval, bool decal_required_e )
00282 {
00283 if (decal_required_t)
00284 _p_f->extended_tree( count_train+_jump );
00285 else
00286 _p_f->extended_tree( count_train );
00287
00288 if (decal_required_e)
00289 return _p_f->mean_post_log_likelihood( count_eval+_jump );
00290 else
00291 return _p_f->mean_post_log_likelihood( count_eval );
00292 }
00293
00295
00299 double mean_post_log_likelihood( unsigned long * count_eval, bool decal_required_e )
00300 {
00301 if (decal_required_e)
00302 return _p_f->mean_post_log_likelihood( count_eval+_jump );
00303 else
00304 return _p_f->mean_post_log_likelihood( count_eval );
00305 }
00306
00308 double mean_post_log_likelihood(){
00309 _p_f->extended_tree();
00310 return _p_f->mean_post_log_likelihood( );
00311 }
00312
00313
00315
00318 template <class TSeq>
00319 double post_log_likelihood( const TSeq& tseq_eval )
00320 {
00321 if ( tseq_eval.tell_order() != _order ){
00322 cerr << "Order incompatibility beetween sequence and model " << endl;
00323 exit(-1);
00324 }
00325
00326
00327 tseq_eval.count_occurencies();
00328
00329 return post_log_likelihood( tseq_eval.get_count(), true );
00330 }
00331
00333
00337 double post_log_likelihood( unsigned long * count_eval, bool decal_required_e )
00338 {
00339 if (decal_required_e)
00340 return _p_f->post_log_likelihood( count_eval+_jump );
00341 }
00342
00343
00344
00346
00353 void draw( unsigned long * count, bool decal_required,
00354 gsl_rng * r,
00355 const Translator & trans = Translator(),
00356 const string & xmlfile = string() )
00357 {
00358 _nb_param = 0;
00359 #ifdef HAVE_LIBXML2
00360 if (xmlfile.size() != 0){
00361 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00362 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00363 xmlDocSetRootElement(doc,root_node);
00364 if (decal_required)
00365 _p_f->extended_tree( count+_jump );
00366 else
00367 _p_f->extended_tree( count );
00368
00369 pmm_tree p_t = _p_f->draw(r);
00370 p_t.tree_to_matrix( _Pi );
00371 _nb_param += p_t.nb_leaves() * (_size-1);
00372
00373 std::stringstream treenamestream;
00374 p_t.save( trans, root_node, treenamestream.str() );
00375 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00376 xmlFreeDoc(doc);
00377 }
00378 else{
00379 if (decal_required)
00380 _p_f->extended_tree( count+_jump );
00381 else
00382 _p_f->extended_tree( count );
00383
00384 pmm_tree p_t = _p_f->draw(r);
00385 p_t.tree_to_matrix( _Pi );
00386 _nb_param += p_t.nb_leaves() * (_size-1);
00387
00388 }
00389 #endif
00390 #ifndef HAVE_LIBXML2
00391 if (xmlfile.size() != 0){
00392 cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00393 }
00394 if (decal_required)
00395 _p_f->extended_tree( count+_jump );
00396 else
00397 _p_f->extended_tree( count );
00398
00399 pmm_tree p_t = _p_f->draw(r);
00400 p_t.tree_to_matrix( _Pi );
00401 _nb_param += p_t.nb_leaves() * (_size-1);
00402
00403 #endif
00404
00405 if (_Mu != NULL)
00406 this->compute_stat_law( true );
00407 }
00408
00410
00415 void draw( gsl_rng * r,
00416 const Translator & trans,
00417 const string & xmlfile = string() )
00418 {
00419 _nb_param = 0;
00420 #ifdef HAVE_LIBXML2
00421 if (xmlfile.size() != 0){
00422 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00423 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00424 xmlDocSetRootElement(doc,root_node);
00425
00426 pmm_tree p_t = _p_f->draw(r);
00427 p_t.tree_to_matrix( _Pi );
00428 _nb_param += p_t.nb_leaves() * (_size-1);
00429
00430 std::stringstream treenamestream;
00431 p_t.save( trans, root_node, treenamestream.str() );
00432 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00433 xmlFreeDoc(doc);
00434 }
00435 else{
00436
00437 pmm_tree p_t = _p_f->draw(r);
00438 p_t.tree_to_matrix( _Pi );
00439 _nb_param += p_t.nb_leaves() * (_size-1);
00440
00441 }
00442 #endif
00443 #ifndef HAVE_LIBXML2
00444 if (xmlfile.size() != 0){
00445 cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00446 }
00447
00448 pmm_tree p_t = _p_f->draw(r);
00449 p_t.tree_to_matrix( _Pi );
00450 _nb_param += p_t.nb_leaves() * (_size-1);
00451
00452 #endif
00453
00454 if (_Mu != NULL)
00455 this->compute_stat_law( true );
00456 }
00457
00459 void info_nb_leaves() const{
00460 cout<<_p_f->current_pmm_tree().nb_leaves()<<" leaves"
00461 <<endl;
00462 }
00463
00464 void set_penalty( double penalty ){
00465 _p_f->set_penalty( penalty);
00466 }
00467
00468 void default_prior_alpha(){
00469 vector<double> tmp;
00470 tmp.assign(_size, 1.);
00471 _prior_alpha.assign(1, tmp);
00472 }
00473
00474 ~PMarkov(){
00475
00476 }
00477 };
00478 #endif