tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
nodes.h
1/*
2 Copyright (c) 2016, Taiga Nomi
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28
29#include <vector>
30#include <tuple>
31#include <unordered_map>
32#include <cereal/types/utility.hpp>
33#include <cereal/types/tuple.hpp>
34
35#include "tiny_dnn/util/util.h"
36#include "tiny_dnn/layers/layer.h"
37#include "tiny_dnn/optimizers/optimizer.h"
38
39namespace cereal {
40
41template <typename Archive>
42void save(Archive & ar, const std::vector<tiny_dnn::layerptr_t>& v) {
43 ar(cereal::make_size_tag((cereal::size_type)v.size()));
44 for (auto n : v) {
45 tiny_dnn::layer::save_layer(ar, *n);
46 }
47}
48
49
50template <typename Archive>
51void load(Archive & ar, std::vector<std::shared_ptr<tiny_dnn::layer>>& v) {
52 cereal::size_type size;
53 ar(cereal::make_size_tag(size));
54
55 for (size_t i = 0; i < size; i++) {
56 v.emplace_back(tiny_dnn::layer::load_layer(ar));
57 }
58}
59
60}
61
62namespace tiny_dnn {
63
85class nodes {
86 public:
87 typedef std::vector<layerptr_t>::iterator iterator;
88 typedef std::vector<layerptr_t>::const_iterator const_iterator;
89
95 virtual
96 void backward(const std::vector<tensor_t>& first) = 0;
97
102 virtual
103 std::vector<tensor_t> forward(const std::vector<tensor_t>& first) = 0; // NOLINT
104
108 virtual
110 for (auto l : nodes_) {
111 l->update_weight(opt, batch_size);
112 }
113 }
114
118 virtual void setup(bool reset_weight) {
119 for (auto l : nodes_) {
120 l->setup(reset_weight);
121 }
122 }
123
124 void clear_grads() {
125 for (auto l : nodes_) {
126 l->clear_grads();
127 }
128 }
129
130 size_t size() const { return nodes_.size(); }
131 iterator begin() { return nodes_.begin(); }
132 iterator end() { return nodes_.end(); }
133 const_iterator begin() const { return nodes_.begin(); }
134 const_iterator end() const { return nodes_.end(); }
135 layer* operator[] (size_t index) { return nodes_[index]; }
136 const layer* operator[] (size_t index) const { return nodes_[index]; }
137 serial_size_t in_data_size() const { return nodes_.front()->in_data_size(); }
138 serial_size_t out_data_size() const { return nodes_.back()->out_data_size(); }
139
140 template <typename T>
141 const T& at(size_t index) const {
142 const T* v = dynamic_cast<const T*>(nodes_[index]);
143 if (v) return *v;
144 throw nn_error("failed to cast");
145 }
146
147 template <typename T>
148 T& at(size_t index) {
149 T* v = dynamic_cast<T*>(nodes_[index]);
150 if (v) return *v;
151 throw nn_error("failed to cast");
152 }
153
154 // @todo: multiple output
155 virtual float_t target_value_min(int out_channel = 0) const {
156 CNN_UNREFERENCED_PARAMETER(out_channel);
157 return nodes_.back()->out_value_range().first;
158 }
159
160 virtual float_t target_value_max(int out_channel = 0) const {
161 CNN_UNREFERENCED_PARAMETER(out_channel);
162 return nodes_.back()->out_value_range().second;
163 }
164
165 void save(std::ostream& os) const { // NOLINT
166 for (auto& l : nodes_) {
167 l->save(os);
168 }
169 }
170
171 void load(std::istream& is) { // NOLINT
172 setup(false);
173 for (auto& l : nodes_) {
174 l->load(is);
175 }
176 }
177
178 virtual void load(const std::vector<float_t>& vec) {
179 int idx = 0;
180 setup(false);
181 for (auto& l : nodes_) {
182 l->load(vec, idx);
183 }
184 }
185
186 void label2vec(const label_t* t, serial_size_t num, std::vector<vec_t> *vec) const {
187 serial_size_t outdim = out_data_size();
188
189 vec->reserve(num);
190 for (serial_size_t i = 0; i < num; i++) {
191 assert(t[i] < outdim);
192 vec->emplace_back(outdim, target_value_min());
193 vec->back()[t[i]] = target_value_max();
194 }
195 }
196
197 template <typename OutputArchive>
198 void save_model(OutputArchive & oa) const;
199
200 template <typename InputArchive>
201 void load_model(InputArchive & ia);
202
203
204 template <typename OutputArchive>
205 void save_weights(OutputArchive & oa) const {
206 for (auto n : nodes_) {
207 oa(*n);
208 }
209 }
210
211 template <typename InputArchive>
212 void load_weights(InputArchive & ia) {
213 for (auto n : nodes_) {
214 ia(*n);
215 }
216 }
217
218 protected:
219 template <typename T>
220 void push_back(T&& node) {
221 push_back_impl(std::forward<T>(node),
222 typename std::is_rvalue_reference<decltype(node)>::type()); // NOLINT
223 }
224
225 template <typename T>
226 void push_back(std::shared_ptr<T> node) {
227 own_nodes_.push_back(node);
228 nodes_.push_back(own_nodes_.back().get());
229 }
230
231 // transform indexing so that it's more suitable for per-layer operations
232 // input: [sample][channel][feature]
233 // output: [channel][sample][feature]
234 std::vector<tensor_t> reorder_for_layerwise_processing(const std::vector<tensor_t>& input) {
235 const serial_size_t sample_count = static_cast<serial_size_t>(input.size());
236 const serial_size_t channel_count = static_cast<serial_size_t>(input[0].size());
237
238 // @todo we could perhaps pass pointers to underlying vec_t objects, in order to avoid copying
239 std::vector<tensor_t> output(channel_count, tensor_t(sample_count));
240
241 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
242 assert(input[sample].size() == channel_count);
243 for (serial_size_t channel = 0; channel < channel_count; ++channel) {
244 output[channel][sample] = input[sample][channel];
245 }
246 }
247
248 return output;
249 }
250
251 template <typename T>
252 void push_back_impl(T&& node, std::true_type) { // is_rvalue_reference
253 own_nodes_.push_back(std::make_shared<
254 typename std::remove_reference<T>::type>(std::forward<T>(node)));
255 nodes_.push_back(own_nodes_.back().get());
256 }
257
258 template <typename T>
259 void push_back_impl(T&& node, std::false_type) {
260 nodes_.push_back(&node);
261 }
262
263 /* Nodes which this class has ownership */
264 std::vector<std::shared_ptr<layer>> own_nodes_;
265 /* List of all nodes which includes own_nodes */
266 std::vector<layerptr_t> nodes_;
267};
268
272class sequential : public nodes {
273 public:
274 void backward(const std::vector<tensor_t>& first) override {
275
276 const std::vector<tensor_t> reordered_grad = reorder_for_layerwise_processing(first);
277 assert(reordered_grad.size() == 1);
278
279 nodes_.back()->set_out_grads({ reordered_grad[0] });
280
281 for (auto l = nodes_.rbegin(); l != nodes_.rend(); l++) {
282 (*l)->backward();
283 }
284 }
285
286 std::vector<tensor_t> forward(const std::vector<tensor_t>& first) override {
287
288 const std::vector<tensor_t> reordered_data = reorder_for_layerwise_processing(first);
289 assert(reordered_data.size() == 1);
290
291 nodes_.front()->set_in_data({ reordered_data[0] });
292
293 for (auto l : nodes_) {
294 l->forward();
295 }
296
297 const std::vector<tensor_t> out = nodes_.back()->output();
298
299 return normalize_out(out);
300 }
301
302 template <typename T>
303 void add(T&& layer) {
304 push_back(std::forward<T>(layer));
305
306 if (nodes_.size() != 1) {
307 auto head = nodes_[nodes_.size()-2];
308 auto tail = nodes_[nodes_.size()-1];
309 connect(head, tail, 0, 0);
310 auto out = head->outputs();
311 auto in = tail->inputs();
312 }
313 check_connectivity();
314 }
315
316 void check_connectivity() {
317 for (serial_size_t i = 0; i < nodes_.size() - 1; i++) {
318 auto out = nodes_[i]->outputs();
319 auto in = nodes_[i+1]->inputs();
320
321 if (out[0] != in[0]) {
322 throw nn_error("");
323 }
324 }
325 }
326
327 template <typename InputArchive>
328 void load_connections(InputArchive& ia) {
329 for (serial_size_t i = 0; i < nodes_.size() - 1; i++) {
330 auto head = nodes_[i];
331 auto tail = nodes_[i + 1];
332 connect(head, tail, 0, 0);
333 }
334 }
335
336 template <typename OutputArchive>
337 void save_connections(OutputArchive& ) const { }
338
339private:
340 friend class nodes;
341
342 std::vector<tensor_t> normalize_out(const std::vector<tensor_t>& out)
343 {
344 // normalize indexing back to [sample][layer][feature]
345 std::vector<tensor_t> normalized_output;
346
347 const size_t sample_count = out[0].size();
348 normalized_output.resize(sample_count, tensor_t(1));
349
350 for (size_t sample = 0; sample < sample_count; ++sample) {
351 normalized_output[sample][0] = out[0][sample];
352 }
353
354 return normalized_output;
355 }
356};
357
362class graph : public nodes {
363 public:
364 void backward(const std::vector<tensor_t>& out_grad) override {
365
366 serial_size_t output_channel_count = static_cast<serial_size_t>(out_grad[0].size());
367
368 if (output_channel_count != output_layers_.size()) {
369 throw nn_error("input size mismatch");
370 }
371
372 const std::vector<tensor_t> reordered_grad = reorder_for_layerwise_processing(out_grad);
374
375 for (serial_size_t i = 0; i < output_channel_count; i++) {
376 output_layers_[i]->set_out_grads({ reordered_grad[i] });
377 }
378
379 for (auto l = nodes_.rbegin(); l != nodes_.rend(); l++) {
380 (*l)->backward();
381 }
382 }
383
384 std::vector<tensor_t> forward(const std::vector<tensor_t>& in_data) override {
385
386 serial_size_t input_data_channel_count = static_cast<serial_size_t>(in_data[0].size());
387
388 if (input_data_channel_count != input_layers_.size()) {
389 throw nn_error("input size mismatch");
390 }
391
392 const std::vector<tensor_t> reordered_data = reorder_for_layerwise_processing(in_data);
394
396 input_layers_[channel_index]->set_in_data({ reordered_data[channel_index] });
397 }
398
399 for (auto l : nodes_) {
400 l->forward();
401 }
402 return merge_outs();
403 }
404
405 void construct(const std::vector<layerptr_t>& input,
406 const std::vector<layerptr_t>& output) {
407 std::vector<layerptr_t> sorted;
408 std::vector<nodeptr_t> input_nodes(input.begin(), input.end());
409 std::unordered_map<node*, std::vector<uint8_t>> removed_edge;
410
411 // topological-sorting
412 while (!input_nodes.empty()) {
413 sorted.push_back(dynamic_cast<layerptr_t>(input_nodes.back()));
414 input_nodes.pop_back();
415
416 layerptr_t curr = sorted.back();
417 std::vector<node*> next = curr->next_nodes();
418
419 for (size_t i = 0; i < next.size(); i++) {
420 if (!next[i]) continue;
421 // remove edge between next[i] and current
422 if (removed_edge.find(next[i]) == removed_edge.end()) {
423 removed_edge[next[i]] =
424 std::vector<uint8_t>(next[i]->prev_nodes().size(), 0);
425 }
426
427 std::vector<uint8_t>& removed = removed_edge[next[i]];
428 removed[find_index(next[i]->prev_nodes(), curr)] = 1;
429
430 if (std::all_of(removed.begin(), removed.end(), [](uint8_t x) {
431 return x == 1; })) {
432 input_nodes.push_back(next[i]);
433 }
434 }
435 }
436
437 for (auto& n : sorted) {
438 nodes_.push_back(n);
439 }
440
441 input_layers_ = input;
442 output_layers_ = output;
443
444 setup(false);
445 }
446
447private:
448 friend class nodes;
449
450 struct _graph_connection {
451 void add_connection(serial_size_t head, serial_size_t tail, serial_size_t head_index, serial_size_t tail_index) {
452 if (!is_connected(head, tail, head_index, tail_index)) {
453 connections.emplace_back(head, tail, head_index, tail_index);
454 }
455 }
456
457 bool is_connected(serial_size_t head, serial_size_t tail, serial_size_t head_index, serial_size_t tail_index) const {
458 return std::find(connections.begin(),
459 connections.end(),
460 std::make_tuple(head, tail, head_index, tail_index)) != connections.end();
461 }
462
463 template <typename Archive>
464 void serialize(Archive & ar) {
465 ar(CEREAL_NVP(connections), CEREAL_NVP(in_nodes), CEREAL_NVP(out_nodes));
466 }
467
468 std::vector<std::tuple<serial_size_t, serial_size_t, serial_size_t, serial_size_t>> connections;
469 std::vector<serial_size_t> in_nodes, out_nodes;
470 };
471
472 template <typename OutputArchive>
473 void save_connections(OutputArchive& oa) const {
474 _graph_connection gc;
475 std::unordered_map<node*, serial_size_t> node2id;
476 serial_size_t idx = 0;
477
478 for (auto n : nodes_) {
479 node2id[n] = idx++;
480 }
481 for (auto l : input_layers_) {
482 gc.in_nodes.push_back(node2id[l]);
483 }
484 for (auto l : output_layers_) {
485 gc.out_nodes.push_back(node2id[l]);
486 }
487
488 for (auto l : input_layers_) {
489 graph_traverse(l, [=](layer& l) {}, [&](edge& e) {
490 auto next = e.next();
491 serial_size_t head_index = e.prev()->next_port(e);
492
493 for (auto n : next) {
494 serial_size_t tail_index = n->prev_port(e);
495 gc.add_connection(node2id[e.prev()], node2id[n], head_index, tail_index);
496 }
497 });
498 }
499
500 oa(cereal::make_nvp("graph", gc));
501 }
502
503 template <typename InputArchive>
504 void load_connections(InputArchive& ia) {
505 _graph_connection gc;
506 ia(cereal::make_nvp("graph", gc));
507
508 for (auto c : gc.connections) {
509 serial_size_t head, tail, head_index, tail_index;
510 std::tie(head, tail, head_index, tail_index) = c;
511 connect(nodes_[head], nodes_[tail], head_index, tail_index);
512 }
513 for (auto in : gc.in_nodes) {
514 input_layers_.push_back(nodes_[in]);
515 }
516 for (auto out : gc.out_nodes) {
517 output_layers_.push_back(nodes_[out]);
518 }
519 }
520
521 // normalize indexing back to [sample][layer][feature]
522 std::vector<tensor_t> merge_outs() {
523 std::vector<tensor_t> merged;
524 serial_size_t output_channel_count = static_cast<serial_size_t>(output_layers_.size());
525 for (serial_size_t output_channel = 0; output_channel < output_channel_count; ++output_channel) {
526 std::vector<tensor_t> out = output_layers_[output_channel]->output();
527
528 serial_size_t sample_count = static_cast<serial_size_t>(out[0].size());
529 if (output_channel == 0) {
530 assert(merged.empty());
531 merged.resize(sample_count, tensor_t(output_channel_count));
532 }
533
534 assert(merged.size() == sample_count);
535
536 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
537 merged[sample][output_channel] = out[0][sample];
538 }
539 }
540 return merged;
541 }
542
543 serial_size_t find_index(const std::vector<node*>& nodes,
544 layerptr_t target) {
545 for (serial_size_t i = 0; i < nodes.size(); i++) {
546 if (nodes[i] == static_cast<node*>(&*target)) return i;
547 }
548 throw nn_error("invalid connection");
549 }
550 std::vector<layerptr_t> input_layers_;
551 std::vector<layerptr_t> output_layers_;
552};
553
554
555
556template <typename OutputArchive>
557void nodes::save_model(OutputArchive & oa) const {
558 oa(cereal::make_nvp("nodes", nodes_));
559
560 if (typeid(*this) == typeid(sequential)) {
561 dynamic_cast<const sequential*>(this)->save_connections(oa);
562 }
563 else {
564 dynamic_cast<const graph*>(this)->save_connections(oa);
565 }
566}
567
568template <typename InputArchive>
569void nodes::load_model(InputArchive & ia) {
570 own_nodes_.clear();
571 nodes_.clear();
572
573 ia(cereal::make_nvp("nodes", own_nodes_));
574
575 for (auto& n : own_nodes_) {
576 nodes_.push_back(&*n);
577 }
578
579 if (typeid(*this) == typeid(sequential)) {
580 dynamic_cast<sequential*>(this)->load_connections(ia);
581 }
582 else {
583 dynamic_cast<graph*>(this)->load_connections(ia);
584 }
585}
586
587
588} // namespace tiny_dnn
589
generic graph network
Definition nodes.h:362
void backward(const std::vector< tensor_t > &out_grad) override
propagate gradient
Definition nodes.h:364
std::vector< tensor_t > forward(const std::vector< tensor_t > &in_data) override
Definition nodes.h:384
Simple image utility class.
Definition image.h:94
base class of all kind of NN layers
Definition layer.h:62
static std::shared_ptr< layer > load_layer(InputArchive &ia)
generate layer from cereal's Archive
Definition deserialization_helper.h:159
error exception class for tiny-dnn
Definition nn_error.h:37
basic class of various network types (sequential, multi-in/multi-out).
Definition nodes.h:85
virtual void backward(const std::vector< tensor_t > &first)=0
propagate gradient
virtual std::vector< tensor_t > forward(const std::vector< tensor_t > &first)=0
virtual void update_weights(optimizer *opt, int batch_size)
update weights and clear all gradients
Definition nodes.h:109
virtual void setup(bool reset_weight)
setup all weights, must be called before forward/backward
Definition nodes.h:118
single-input, single-output feedforward network
Definition nodes.h:272
std::vector< tensor_t > forward(const std::vector< tensor_t > &first) override
Definition nodes.h:286
void backward(const std::vector< tensor_t > &first) override
propagate gradient
Definition nodes.h:274
base class of optimizer usesHessian : true if an optimizer uses hessian (2nd order derivative of loss...
Definition optimizer.h:37