libfreecontact 1.0.21
Loading...
Searching...
No Matches
freecontact.h
Go to the documentation of this file.
1/* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment
2* Copyright (C) 2012-2013 Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany
3*
4* This program is free software: you can redistribute it and/or modify
5* it under the terms of the GNU General Public License as published by
6* the Free Software Foundation, either version 3 of the License, or
7* (at your option) any later version.
8*
9* This program is distributed in the hope that it will be useful,
10* but WITHOUT ANY WARRANTY; without even the implied warranty of
11* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12* GNU General Public License for more details.
13*
14* You should have received a copy of the GNU General Public License
15* along with this program. If not, see <http://www.gnu.org/licenses/>.
16*/
17#ifndef LIBFREECONTACT_H
18#define LIBFREECONTACT_H
19
20#include <errno.h>
21#include <map>
22#include <math.h>
23#include <stdexcept>
24#include <stdint.h>
25#include <string.h>
26#include <vector>
27#ifdef __SSE2__
28#include <x86intrin.h>
29#endif // __SSE2__
30
31#define LIBFREEC_API __attribute__ ((visibility ("default")))
32#define LIBFREEC_LOCAL __attribute__ ((visibility ("hidden")))
33
34namespace freecontact {
35
36// http://www.fao.org/docrep/004/Y2775E/y2775e0e.htm
37// Use 20 for gap.
38static const uint8_t aamap_gapidx = 20;
39static const uint8_t aamap[] = {
40 // A B C D E F G H I J K L M N
41 21, 0, 3, 4, 3, 6, 13, 7, 8, 9, 21, 11, 10, 12, 2,
42 // O P Q R S T U V W X Y Z
43 21, 14, 5, 1, 15, 16, 21, 19, 17, 21, 18, 6
44};
45static const uint8_t mapaa[] = {
46 //0 1 2 3 4 5 6 7 8 9
47 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
48 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V',
49 '-', 'X'
50};
51
53class LIBFREEC_API alilen_error : public std::runtime_error {
54 public:
55 alilen_error(const std::string& __arg) : std::runtime_error(__arg){}
56};
57
59class LIBFREEC_API icme_timeout_error : public std::runtime_error {
60 public:
61 icme_timeout_error(const std::string& __arg) : std::runtime_error(__arg){}
62};
63
64#ifndef __SSE2__
65typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
66#endif
67
69class LIBFREEC_API ali_t : public std::vector<__m128i>
70{
71 private:
72 #pragma GCC visibility push(hidden)
73 #pragma GCC visibility pop
74 public:
75 uint32_t seqcnt;
76 uint16_t alilen; // aligned sequence length, max 65535, reasonable
77 uint16_t alilen16; // size in 16-bytes
78 uint16_t alilenpad; // 16 * alilen16
79
80 ali_t( const uint16_t __alilen = 0 ) : std::vector<__m128i>(), seqcnt(0), alilen(__alilen), alilen16(( alilen-1 )/16 + 1), alilenpad( 16 * alilen16 )
81 {
82 reserve(1024*alilen16);
83 }
84 virtual ~ali_t(){}
85
86 inline uint8_t& operator()(uint32_t __k, uint16_t __ai)
87 {
88 return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
89 }
90 inline const uint8_t& operator()(uint32_t __k, uint16_t __ai) const
91 {
92 return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
93 }
94
96 static std::vector<uint8_t>
97 read_a_seq( const std::string& __l )
98 {
99 std::vector<uint8_t> ret; ret.reserve(__l.length());
100 for( std::string::const_iterator l_b = __l.begin(), l_e = __l.end(); l_b != l_e; ++l_b )
101 {
102 const std::string::value_type c = *l_b;
103 if( isalpha(c) )
104 ret.push_back( aamap[c & 0x1F] );
105 else if( c == '-' )
106 ret.push_back( 20 );
107 else break;
108 }
109 return ret;
110 }
111
113 ali_t& push(const std::vector<uint8_t>& __al);// throw (alilen_error);
114
116 inline ali_t& push(const std::string& __l) //throw (alilen_error)
117 {
118 return push(read_a_seq(__l));
119 }
120};
121
122
126 uint16_t i, j;
128 float score;
129 contact_t( uint16_t __i = 0, uint16_t __j = 0, float __score = 0 ) : i(__i), j(__j), score(__score) {}
130
131 inline bool operator<( const contact_t& __b ) const { return score < __b.score; } // to facilitate sorting by score
132 inline bool operator>( const contact_t& __b ) const { return score > __b.score; } // to facilitate sorting by score
133};
134
136struct parset_t {
137 double clustpc, density, gapth; uint16_t mincontsep;
139 bool cov20, apply_gapth; double rho;
140
141 parset_t( double __clustpc = 0, double __density = 0, double __gapth = 0, uint16_t __mincontsep = 0,
142 double __pseudocnt = 0, double __pscnt_weight = 0, bool __estimate_ivcov = 0, double __shrink_lambda = 0,
143 bool __cov20 = 0, bool __apply_gapth = 0, double __rho = 0
144 ) :
145 clustpc(__clustpc), density(__density), gapth(__gapth), mincontsep(__mincontsep),
146 pseudocnt(__pseudocnt), pscnt_weight(__pscnt_weight), estimate_ivcov(__estimate_ivcov), shrink_lambda(__shrink_lambda),
147 cov20(__cov20), apply_gapth(__apply_gapth), rho(__rho)
148 {}
149};
150
151const parset_t // clust conts gapt minc pseud pscw estim shrin cov20 agapth rho
152 //ps_evfold(0.70, 0.0, 1.0, 1, 0.0, 0.5, false, 0.0, true, false, -1 ), // original Matlab that expects gap pre-filtering
153 ps_evfold( 0.70, 0.0, 0.9, 1, 0.0, 0.5, false, 0.0, true, true, -1 ),
154 ps_psicov( 0.62, 0.03, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, -1 ), // as published
155 ps_psicov_sd( 0.62, 0.0, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, 0.001 ); // PSICOV sensible default (sd) for most cases
156
158
161 public:
162 typedef std::map<std::string, std::vector<contact_t> > cont_res_t;
163 typedef double freq_t;
164 typedef float pairfreq_t;
165 typedef std::vector<freq_t> freq_vec_t;
166 typedef std::map<std::string, double> time_res_t;
167 public:
168 bool dbg;
169 predictor(bool __dbg = false) : dbg(__dbg) {}
170 virtual ~predictor(){}
171
173
181 void get_seq_weights(
182 freq_vec_t &__aliw, double &__wtot,
183 const ali_t& __ali, double __clustpc,
184 bool __veczw = true, int __num_threads = 0
185 );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
186
188
214 cont_res_t run(const ali_t& __ali, const freq_vec_t &__aliw, const double __wtot,
215 double __density, double __gapth, uint16_t __mincontsep,
216 double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
217 bool __cov20, bool __apply_gapth, double __rho,
218 int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
219 );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
220
222 cont_res_t run(const ali_t& __ali, double __clustpc,
223 double __density, double __gapth, uint16_t __mincontsep,
224 double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
225 bool __cov20, bool __apply_gapth, double __rho,
226 bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
227 );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
228
230 inline
231 cont_res_t run(const ali_t& __ali,
232 const parset_t& __parset,
233 bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
234 )// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
235 {
236 return run(__ali, __parset.clustpc,
237 __parset.density, __parset.gapth, __parset.mincontsep,
238 __parset.pseudocnt, __parset.pscnt_weight, __parset.estimate_ivcov, __parset.shrink_lambda,
239 __parset.cov20, __parset.apply_gapth, __parset.rho,
240 __veczw, __num_threads, __icme_timeout, __timing);
241 }
242};
243
244} // namespace freecontact
245
246#endif // LIBFREECONTACT_H
247// vim:et:ts=4:ai:
Protein sequence alignment.
Definition freecontact.h:70
ali_t & push(const std::string &__l)
Push a sequence to the alignment.
const uint8_t & operator()(uint32_t __k, uint16_t __ai) const
Definition freecontact.h:90
static std::vector< uint8_t > read_a_seq(const std::string &__l)
Convert alignment string to amino acid codes with aamap.
Definition freecontact.h:97
ali_t(const uint16_t __alilen=0)
Definition freecontact.h:80
uint8_t & operator()(uint32_t __k, uint16_t __ai)
Definition freecontact.h:86
Alignment length exception.
Definition freecontact.h:53
alilen_error(const std::string &__arg)
Definition freecontact.h:55
Inverse covariance matrix estimation timeout exception.
Definition freecontact.h:59
icme_timeout_error(const std::string &__arg)
Definition freecontact.h:61
Protein residue contact predictor.
std::map< std::string, std::vector< contact_t > > cont_res_t
std::map< std::string, double > time_res_t
cont_res_t run(const ali_t &__ali, const parset_t &__parset, bool __veczw=true, int __num_threads=0, time_t __icme_timeout=1800, time_res_t *__timing=NULL)
Predict residue contacts.
predictor(bool __dbg=false)
std::vector< freq_t > freq_vec_t
#define LIBFREEC_API
Definition freecontact.h:31
const parset_t ps_psicov_sd(0.62, 0.0, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, 0.001)
static const uint8_t aamap_gapidx
Definition freecontact.h:38
long long __m128i __attribute__((__vector_size__(16), __may_alias__))
Definition freecontact.h:65
const parset_t ps_evfold(0.70, 0.0, 0.9, 1, 0.0, 0.5, false, 0.0, true, true, -1)
const parset_t ps_psicov(0.62, 0.03, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, -1)
static const uint8_t aamap[]
Definition freecontact.h:39
static const uint8_t mapaa[]
Definition freecontact.h:45
Contact prediction.
contact_t(uint16_t __i=0, uint16_t __j=0, float __score=0)
bool operator<(const contact_t &__b) const
float score
Contact score.
uint16_t i
Residue number, 0-based(!).
bool operator>(const contact_t &__b) const
Parameter set structure for predictor::run().
parset_t(double __clustpc=0, double __density=0, double __gapth=0, uint16_t __mincontsep=0, double __pseudocnt=0, double __pscnt_weight=0, bool __estimate_ivcov=0, double __shrink_lambda=0, bool __cov20=0, bool __apply_gapth=0, double __rho=0)