librostlab 1.0.20
Loading...
Searching...
No Matches
readFasta.h
Go to the documentation of this file.
1/*
2 Copyright (C) 2011 Laszlo Kajan, Technical University of Munich, Germany
3
4 This file is part of librostlab.
5
6 librostlab is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef ROSTLAB_READFASTA
20#define ROSTLAB_READFASTA 1
21
22#include <boost/regex.hpp>
23#include <iostream>
24#include <fstream>
26
27namespace bo = boost;
28
29namespace rostlab {
30namespace bio {
31
32 namespace fmt {
33 class fasta{}; // fasta format class
34 };
35
36template<typename _FmtT>
37class seq {
38 private:
39 std::string _desc;
40 std::string _display_id;
41 std::string _seqstr;
42 public:
43 seq(){};
44 seq( const std::string& __desc, const std::string& __display_id, const std::string& __seqstr ) : _desc(__desc), _display_id(__display_id), _seqstr(__seqstr) {};
45 virtual ~seq(){};
46
47 std::string& seqstr(){ return _seqstr; };
48};
49
50/*template<> // could specialize it...
51class seq<bio::fmt::fasta>
52{
53 private:
54 public:
55};*/
56
57/*template<typename _FmtT>
58std::istream& operator>>( std::istream& __is, bio::seq<_FmtT>& __n )
59{
60 return __is;
61}*/
62
63inline std::istream& operator>>( std::istream& __is, bio::seq<bio::fmt::fasta>& __seq )
64{
65 // based on Bio/SeqIO/fasta.pm
66 std::string rec; rec.reserve(1024);
67 while( __is.peek() != std::istream::traits_type::eof() )
68 {
69 if(rec.capacity() == rec.size()) rec.reserve(rec.capacity() * 2);
70 if( rec.size() && __is.peek() == '>' && *rec.rbegin() == '\n' ) break;
71 else rec += __is.get();
72 }
73
74 if( !rec.size() || *rec.begin() != '>' ) throw runtime_error( std::string("FASTA syntax error in record '") + rec + "': no leading '>'" );
75
76 rec = bo::regex_replace( rec, bo::regex("^>"), "" ); // $entry =~ s/^>//;
77
78 bo::sregex_token_iterator i(rec.begin(), rec.end(), bo::regex("\n"), -1); // split(/\n/,$entry,2);
79
80 if( i == boost::sregex_token_iterator() ) throw runtime_error( std::string("FASTA syntax error in record '") + rec + "': only one line" );
81
82 std::string top = *i++;
83 std::string sequence( i->first, static_cast<std::string::const_iterator>( rec.end() ) );
84
85 sequence = bo::regex_replace( sequence, bo::regex(">"), "" ); // $sequence =~ s/>//g;
86
87 bo::match_results<std::string::const_iterator> what;
88 std::string id, fulldesc;
89 if( bo::regex_search( top, what, bo::regex("^[[:space:]]*([^[:space:]]+)[:space:]*(.*)") ) )
90 { id = std::string( what[1].first, what[1].second ); fulldesc = std::string( what[2].first, what[2].second ); }
91
92 if( id.empty() ) id = fulldesc;
93
94 sequence = bo::regex_replace( sequence, bo::regex("[ \t\n\r]"), "" );
95
96 // alphabet? would be good to have this
97
99
100 return __is;
101}
102
103}; // namespace bio
104}; // namespace rostlab
105
106#endif /* ROSTLAB_READFASTA */
107// vim:et:ts=2:ai:
std::string & seqstr()
Definition readFasta.h:47
virtual ~seq()
Definition readFasta.h:45
seq(const std::string &__desc, const std::string &__display_id, const std::string &__seqstr)
Definition readFasta.h:44
std::istream & operator>>(std::istream &__is, bio::seq< bio::fmt::fasta > &__seq)
Definition readFasta.h:63