Needle
An application for fast and efficient searches of NGS data.
Loading...
Searching...
No Matches
shared.h
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/needle/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
8#pragma once
9
10#include <robin_hood.h>
11
12#include <seqan3/alphabet/nucleotide/dna4.hpp>
13#include <seqan3/io/sequence_file/input.hpp>
14#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
15#include <seqan3/search/kmer_index/shape.hpp>
16#include <seqan3/search/views/minimiser_hash.hpp>
17
18inline constexpr static uint64_t adjust_seed(uint8_t const kmer_size, uint64_t const seed = 0x8F3F73B5CF1C9ADEULL) noexcept
19{
20 return seed >> (64u - 2u * kmer_size);
21}
22
25{
26 std::filesystem::path path_out{"./"};
27 uint8_t threads{1};
28};
29
32{
33 uint8_t k{20};
34 seqan3::seed s{0x8F3F73B5CF1C9ADEULL};
35 seqan3::shape shape = seqan3::ungapped{k};
36 seqan3::window_size w_size{60};
37};
38
41{
42 bool compressed = false;
43 std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
44 uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
45 bool samplewise{false};
46
47 template<class Archive>
48 void save(Archive & archive) const
49 {
50 archive(k);
51 archive(w_size.get());
52 archive(s.get());
53 archive(shape);
54 archive(compressed);
56 archive(expression_thresholds);
57 archive(samplewise);
58 }
59
60 template<class Archive>
61 void load(Archive & archive)
62 {
63 archive(k);
64 archive(w_size.get());
65 archive(s.get());
66 archive(shape);
67 archive(compressed);
69 archive(expression_thresholds);
70 archive(samplewise);
71 }
72};
73
78static void load_args(estimate_ibf_arguments & args, std::filesystem::path ipath)
79{
80 std::ifstream is{ipath, std::ios::binary};
81 cereal::BinaryInputArchive iarchive{is};
82 iarchive(args);
83}
84
89static void store_args(estimate_ibf_arguments const & args, std::filesystem::path opath)
90{
91 std::ofstream os{opath, std::ios::binary};
92 cereal::BinaryOutputArchive oarchive{os};
93 oarchive(args);
94}
95
97struct my_traits : seqan3::sequence_file_input_default_traits_dna
98{
99 using sequence_alphabet = seqan3::dna4;
100 //TODO: Should I use a bitcompressed_vector to save memory but with the disadvantage of losing speed?
101 //template <typename alph>
102 //using sequence_container = seqan3::bitcompressed_vector<alph>;
103};
104
109template <class IBFType>
110void load_ibf(IBFType & ibf, std::filesystem::path ipath)
111{
112 std::ifstream is{ipath, std::ios::binary};
113 cereal::BinaryInputArchive iarchive{is};
114 iarchive(ibf);
115}
116
121template <class IBFType>
122void store_ibf(IBFType const & ibf,
123 std::filesystem::path opath)
124{
125 std::ofstream os{opath, std::ios::binary};
126 cereal::BinaryOutputArchive oarchive{os};
127 oarchive(ibf);
128}
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition ibf.cpp:956
void load_ibf(IBFType &ibf, std::filesystem::path ipath)
Function, loading compressed and uncompressed ibfs.
Definition shared.h:110
void store_ibf(IBFType const &ibf, std::filesystem::path opath)
Function, which stored compressed and uncompressed ibfs.
Definition shared.h:122
arguments used for all tools
Definition shared.h:25
uint8_t threads
Definition shared.h:27
std::filesystem::path path_out
Definition shared.h:26
arguments used for estimate, ibf, ibfmin
Definition shared.h:41
void load(Archive &archive)
Definition shared.h:61
std::vector< uint16_t > expression_thresholds
Definition shared.h:43
uint8_t number_expression_thresholds
Definition shared.h:44
bool compressed
Definition shared.h:42
bool samplewise
Definition shared.h:45
void save(Archive &archive) const
Definition shared.h:48
arguments used for estimate, ibf, minimiser
Definition shared.h:32
uint8_t k
Definition shared.h:33
seqan3::shape shape
Definition shared.h:35
seqan3::window_size w_size
Definition shared.h:36
seqan3::seed s
Definition shared.h:34
Use dna4 instead of default dna5.
Definition shared.h:98
seqan3::dna4 sequence_alphabet
Definition shared.h:99