tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
layer.h
1/*
2 Copyright (c) 2013, Taiga Nomi
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28#include <sstream>
29#include <iomanip>
30#include <memory>
31#include <numeric>
32#include <algorithm>
33#include <vector>
34#include <string>
35#include <utility>
36#include <queue>
37
38#include "tiny_dnn/node.h"
39#include "tiny_dnn/core/backend.h"
40#include "tiny_dnn/core/framework/device.fwd.h"
41
42#include "tiny_dnn/util/util.h"
43#include "tiny_dnn/util/product.h"
44#include "tiny_dnn/util/image.h"
45#include "tiny_dnn/util/weight_init.h"
46
47#include "tiny_dnn/optimizers/optimizer.h"
48#include "tiny_dnn/activations/activation_function.h"
49
50namespace tiny_dnn {
51
62class layer : public node {
63 public:
64 friend void connection_mismatch(const layer& from,
65 const layer& to);
66
67 virtual ~layer() = default;
68
76 layer(const std::vector<vector_type>& in_type,
77 const std::vector<vector_type>& out_type)
78 : node(static_cast<serial_size_t>(in_type.size()), static_cast<serial_size_t>(out_type.size())),
81 in_channels_(static_cast<serial_size_t>(in_type.size())),
82 out_channels_(static_cast<serial_size_t>(out_type.size())),
85 weight_init_ = std::make_shared<weight_init::xavier>();
86 bias_init_ = std::make_shared<weight_init::constant>();
87 trainable_ = true;
88 }
89
90 layer(const layer&) = default;
91 layer &operator =(const layer&) = default;
92
93#ifdef CNN_USE_DEFAULT_MOVE_CONSTRUCTORS
94 layer(layer&&) = default;
95 layer &operator = (layer&&) = default;
96#endif
97
98 void set_parallelize(bool parallelize) {
99 parallelize_ = parallelize;
100 }
101
102 void set_backend(std::shared_ptr<core::backend> backend) {
104 }
105
106 void set_backend_type(core::backend_t backend_type) {
107 backend_type_ = backend_type;
108 }
109
111 // getter
112
113 bool parallelize() const { return parallelize_; }
114
115 // TODO(edgar): Deprecated: use the below method
116 core::backend_t backend_type() const {
117 return backend_->type();
118 }
119
120 core::backend_t engine() const {
121 return backend_type_;
122 }
123
124 virtual std::string kernel_file() const {
125 return std::string("empty_kernel_str");
126 }
127
128 virtual std::string kernel_header() const {
129 return std::string();
130 }
131
132 virtual void createOp() {
133 }
134
135 void setDevice(const Device& device) {
136 device_ptr_ = const_cast<Device*>(&device);
137 }
138
139 Device* device() const {
140 return device_ptr_;
141 }
142
143 std::shared_ptr<core::backend> backend() { return backend_; }
144
146 serial_size_t in_channels() const { return in_channels_; }
147
149 serial_size_t out_channels() const { return out_channels_; }
150
151 serial_size_t in_data_size() const {
152 return sumif(in_shape(), [&](serial_size_t i) { // NOLINT
153 return in_type_[i] == vector_type::data; }, [](const shape3d& s) {
154 return s.size(); });
155 }
156
157 serial_size_t out_data_size() const {
158 return sumif(out_shape(), [&](serial_size_t i) { // NOLINT
159 return out_type_[i] == vector_type::data; }, [](const shape3d& s) {
160 return s.size(); });
161 }
162
163 std::vector<shape3d> in_data_shape() {
164 return filter(in_shape(), [&](size_t i) { // NOLINT
165 return in_type_[i] == vector_type::data;
166 });
167 }
168
169 std::vector<shape3d> out_data_shape() {
170 return filter(out_shape(), [&](size_t i) { // NOLINT
171 return out_type_[i] == vector_type::data;
172 });
173 }
174
176 serial_size_t in_size() const {
177 return in_data_size();
178 }
179
181 serial_size_t out_size() const {
182 return out_data_size();
183 }
184
185 std::vector<const vec_t*> weights() const {
186 std::vector<const vec_t*> v;
187 for (serial_size_t i = 0; i < in_channels_; i++) {
188 if (is_trainable_weight(in_type_[i])) {
189 v.push_back(get_weight_data(i));
190 }
191 }
192 return v;
193 }
194
195 std::vector<vec_t*> weights() {
196 std::vector<vec_t*> v;
197 for (serial_size_t i = 0; i < in_channels_; i++) {
198 if (is_trainable_weight(in_type_[i])) {
199 v.push_back(get_weight_data(i));
200 }
201 }
202 return v;
203 }
204
205 std::vector<tensor_t*> weights_grads() {
206 std::vector<tensor_t*> v;
207 for (serial_size_t i = 0; i < in_channels_; i++) {
208 if (is_trainable_weight(in_type_[i])) {
209 v.push_back(ith_in_node(i)->get_gradient());
210 }
211 }
212 return v;
213 }
214
215 std::vector<edgeptr_t> inputs() {
216 std::vector<edgeptr_t> nodes;
217 for (serial_size_t i = 0; i < in_channels_; i++) {
218 nodes.push_back(ith_in_node(i));
219 }
220 return nodes;
221 }
222
223 std::vector<edgeptr_t> outputs() {
224 std::vector<edgeptr_t> nodes;
225 for (serial_size_t i = 0; i < out_channels_; i++) {
226 nodes.push_back(ith_out_node(i));
227 }
228 return nodes;
229 }
230
231 std::vector<edgeptr_t> outputs() const {
232 std::vector<edgeptr_t> nodes;
233 for (serial_size_t i = 0; i < out_channels_; i++) {
234 nodes.push_back(const_cast<layerptr_t>(this)
235 ->ith_out_node(i));
236 }
237 return nodes;
238 }
239
240 void set_out_grads(const std::vector<tensor_t>& grad) {
241 serial_size_t j = 0;
242 for (serial_size_t i = 0; i < out_channels_; i++) {
243 if (out_type_[i] != vector_type::data) continue;
244 assert(j < grad.size());
245 *ith_out_node(i)->get_gradient() = grad[j++];
246 }
247 }
248
249 void set_in_data(const std::vector<tensor_t>& data) {
250 serial_size_t j = 0;
251 for (serial_size_t i = 0; i < in_channels_; i++) {
252 if (in_type_[i] != vector_type::data) continue;
253 assert(j < data.size());
254 *ith_in_node(i)->get_data() = data[j++];
255 }
256 }
257
258 std::vector<tensor_t> output() const {
259 std::vector<tensor_t> out;
260 for (serial_size_t i = 0; i < out_channels_; i++) {
261 if (out_type_[i] == vector_type::data) {
262 out.push_back(*(const_cast<layerptr_t>(this))
263 ->ith_out_node(i)->get_data());
264 }
265 }
266 return out;
267 }
268
269 std::vector<vector_type> in_types() const { return in_type_; }
270
271 std::vector<vector_type> out_types() const { return out_type_; }
272
273 void set_trainable(bool trainable) { trainable_ = trainable; }
274
275 bool trainable() const { return trainable_; }
276
282 virtual std::pair<float_t, float_t> out_value_range() const {
283 return { float_t(0.0), float_t(1.0) };
284 }
285
289 virtual std::vector<shape3d> in_shape() const = 0;
290
294 virtual std::vector<shape3d> out_shape() const = 0;
295
299 virtual std::string layer_type() const = 0;
300
306 virtual serial_size_t fan_in_size() const {
307 return in_shape()[0].width_;
308 }
309
315 virtual serial_size_t fan_out_size() const {
316 return out_shape()[0].width_;
317 }
318
320 // setter
321 template <typename WeightInit>
322 layer& weight_init(const WeightInit& f) {
323 weight_init_ = std::make_shared<WeightInit>(f);
324 return *this;
325 }
326
327 template <typename BiasInit>
328 layer& bias_init(const BiasInit& f) {
329 bias_init_ = std::make_shared<BiasInit>(f);
330 return *this;
331 }
332
333 template <typename WeightInit>
334 layer& weight_init(std::shared_ptr<WeightInit> f) {
335 weight_init_ = f;
336 return *this;
337 }
338
339 template <typename BiasInit>
340 layer& bias_init(std::shared_ptr<BiasInit> f) {
341 bias_init_ = f;
342 return *this;
343 }
344
346 // save/load
347 template <typename Archive>
348 void serialize(Archive & ar) {
349 auto all_weights = weights();
350 for (auto weight : all_weights) {
351 ar(*weight);
352 }
353 initialized_ = true;
354 }
355
356 virtual void save(std::ostream& os) const { // NOLINT
357 /*if (is_exploded()) {
358 throw nn_error("failed to save weights because of infinite weight");
359 }*/
360 auto all_weights = weights();
361 for (auto& weight : all_weights) {
362 for (auto w : *weight) os << w << " ";
363 }
364 }
365
366 virtual void load(std::istream& is) { // NOLINT
367 auto all_weights = weights();
368 for (auto& weight : all_weights) {
369 for (auto& w : *weight) is >> w;
370 }
371 initialized_ = true;
372 }
373
374 virtual void load(const std::vector<float_t>& src, int& idx) { // NOLINT
375 auto all_weights = weights();
376 for (auto& weight : all_weights) {
377 for (auto& w : *weight) w = src[idx++];
378 }
379 initialized_ = true;
380 }
381
383 // visualize
384
388 virtual image<> output_to_image(size_t channel = 0) const {
389 const vec_t* output = &(*(outputs()[channel]->get_data()))[0];
390 return vec2image<unsigned char>(*output, out_shape()[channel]);
391 }
392
394 // fprop/bprop
395
400 virtual void forward_propagation(const std::vector<tensor_t*>& in_data,
401 std::vector<tensor_t*>& out_data) = 0;
402
410 virtual void back_propagation(const std::vector<tensor_t*>& in_data,
411 const std::vector<tensor_t*>& out_data,
412 std::vector<tensor_t*>& out_grad,
413 std::vector<tensor_t*>& in_grad) = 0;
414
419 //virtual void back_propagation_2nd(const std::vector<vec_t>& delta_in) = 0;
420
421 // called afrer updating weight
422 virtual void post_update() {}
423
427 virtual void set_context(net_phase ctx) {
428 CNN_UNREFERENCED_PARAMETER(ctx);
429 }
430
431 /* @brief Performs layer forward operation given an input tensor and
432 * returns the computed data in tensor form.
433 *
434 * @param input Vector of `tensor_t` with incoming data.
435 *
436 * Internally, it first allocates data without resetting the weights,
437 * forwards the input data to the computational graph, inside the
438 * forward() method the data from the computational embedded to container
439 * to finally be forwarded to the computational operation kernels.
440 *
441 * TODO: Probably there's an overhead of moving from/to the computational
442 * graph. Will be this overhead reduced once we have the Tensor
443 * class integrated?
444 */
445 std::vector<tensor_t> forward(const std::vector<tensor_t>& input) { // for test
446 // allocate data in the computational graph without
447 // resetting the weights.
448 setup(false);
449 // the incoming data is forwarded to the computational graph.
450 set_in_data(input);
451 // pick up the data from the computational graph and perform
452 // computation.
453 forward();
454 // retrieve computed data and return values in form of 4D tensor.
455 return output();
456 }
457
458 std::vector<tensor_t> backward(const std::vector<tensor_t>& out_grads) { // for test
459 setup(false);
460 set_out_grads(out_grads);
461 backward();
462 return map_<tensor_t>(inputs(), [](edgeptr_t e) {
463 return *e->get_gradient();
464 });
465 }
466
467 /* @brief The purpose of this method is to forward the data from the
468 * computational graph to the layer interface.
469 *
470 * This is one of the out of two core (forward/backward) methods that
471 * retrieves the data allocated in the heap by the computational graph
472 * and constructs the containers to handle the computation by batches.
473 * Additionally, the sample count a.k.a number of batches is set.
474 *
475 * Note: in_data and out_data attempt to contain tensors. However, they
476 * are not real tensors since tensor_t have three dimensions instead of
477 * four. For this reason they are embedded in to std::vector. Also note
478 * that when std::vector<tensor_t*> it's constructed we cannot assure
479 * that data is contiguous.
480 *
481 * After Tensor class integration we should be able to avoid to have
482 * in_data and out_data in vectors since Tensor class itself can handle
483 * batches storage in one single vector with contiguous data.
484 *
485 */
486 void forward() {
487 // the computational graph
488 std::vector<tensor_t*> in_data, out_data;
489
490 // Organize input/output vectors from storage (computational graph).
491 // Internally ith_in_node() will create a connection/edge in the
492 // computational graph and will allocate memory in case that it's not
493 // done yet.
494 for (serial_size_t i = 0; i < in_channels_; i++) {
495 in_data.push_back(ith_in_node(i)->get_data());
496 }
497
498 // resize outs and stuff to have room for every input sample in
499 // the batch
500 set_sample_count(static_cast<serial_size_t>(in_data[0]->size()));
501
502 // Internally ith_out_node() will create a connection/edge to the
503 // computational graph and will allocate memory in case that it's not
504 // done yet. In addition, gradient vector are initialized to default
505 // values.
506 for (serial_size_t i = 0; i < out_channels_; i++) {
507 out_data.push_back(ith_out_node(i)->get_data());
508 ith_out_node(i)->clear_grads();
509 }
510
511 // call the forward computation kernel/routine
512 forward_propagation(in_data, out_data);
513 }
514
515 void backward() {
516 std::vector<tensor_t*> in_data, out_data, in_grad, out_grad;
517
518 // organize input/output vectors from storage
519 for (serial_size_t i = 0; i < in_channels_; i++) {
520 in_data.push_back(ith_in_node(i)->get_data());
521 }
522 for (serial_size_t i = 0; i < out_channels_; i++) {
523 out_data.push_back(ith_out_node(i)->get_data());
524 }
525 for (serial_size_t i = 0; i < in_channels_; i++) {
526 in_grad.push_back(ith_in_node(i)->get_gradient());
527 }
528 for (serial_size_t i = 0; i < out_channels_; i++) {
529 out_grad.push_back(ith_out_node(i)->get_gradient());
530 }
531 back_propagation(in_data, out_data, out_grad, in_grad);
532 }
533
534 /* @brief Allocates data in the computational graph and reset weights if
535 * it's needed or the data is not already initialized.
536 *
537 * @param reset_weight Boolean value to force to reset the weights.
538 * Weights will be automatically reset if the are not initialized.
539 *
540 */
541 void setup(bool reset_weight) {
542 // The input shape (width x height x depth) must be equal to the number
543 // of input channels a.k.a the number of incoming vectors or 'edges' in
544 // the computational nomenclature. Same is applied to output shape and
545 // numbers of output edges.
546 if (in_shape().size() != in_channels_ ||
547 out_shape().size() != out_channels_) {
548 throw nn_error("Connection mismatch at setup layer");
549 }
550
551 // An 'edge' is created in the computational graph from the current
552 // layer/node to each output node and allocates the needed memory.
553 // The number of output nodes is determined by the layer interface.
554 // In order to handle graph based networks, which a layer/node might
555 // have multiple input/output connections, we need to check that the
556 // connection edge does not already exists if we don't want duplicated
557 // memory allocation.
558 for (size_t i = 0; i < out_channels_; i++) {
559 if (!next_[i]) {
560 // connection edge doesn't exist, so we proceed to allocate the
561 // necessary memory.
562 next_[i] = std::make_shared<edge>(
563 this, out_shape()[i], out_type_[i]);
564 }
565 }
566
567 // reset the weights if necessary, or in case that the data is
568 // still not initialized.
569 if (reset_weight || !initialized_) {
570 init_weight();
571 }
572 }
573
574 /* @brief Initializes the vectors containing the trainable data.
575 *
576 * In case that a layer/node is set to be not trainable, it does
577 * nothing and returns a void. Otherwise, for each input connection
578 * and depending of the data nature (weight or bias) calls their
579 * pertinent initialization function and fill the vectors with the
580 * data generated by the mentioned functions.
581 *
582 */
583 void init_weight() {
584 // layer/node is not trainable, do nothing and mark the layer/node
585 // as initialized.
586 if (!trainable_) {
587 initialized_ = true;
588 return;
589 }
590
591 // Fill vector values with data generated by the initialization
592 // function. The pointer to the data is obtained from the
593 // computational graph and the methods fan_in_size() and fan_out_size()
594 // return the number of incoming/outcoming connections for each
595 // input/output unit.
596 for (serial_size_t i = 0; i < in_channels_; i++) {
597 switch (in_type_[i]) {
598 // fill vectors of weight type
599 case vector_type::weight:
600 weight_init_->fill(get_weight_data(i),
602 break;
603 // fill vector of bias type
604 case vector_type::bias:
605 bias_init_->fill(get_weight_data(i),
607 break;
608 default:
609 break;
610 }
611 }
612 // in case we succeed with data initialization, we mark the
613 // layer/node as initialized.
614 initialized_ = true;
615 }
616
617 void clear_grads() {
618 for (serial_size_t i = 0; i < static_cast<serial_size_t>(in_type_.size()); i++) {
619 ith_in_node(i)->clear_grads();
620 }
621 }
622
623 void update_weight(optimizer *o, serial_size_t batch_size) {
624 float_t rcp_batch_size = float_t(1) / float_t(batch_size);
625 vec_t diff;
626 for (serial_size_t i = 0; i < static_cast<serial_size_t>(in_type_.size()); i++) {
627 if (trainable() && is_trainable_weight(in_type_[i])) {
628 vec_t& target = *get_weight_data(i);
629 ith_in_node(i)->merge_grads(&diff);
630 std::transform(diff.begin(), diff.end(),
631 diff.begin(), [&](float_t x) { // NOLINT
632 return x * rcp_batch_size; });
633 // parallelize only when target size is big enough to mitigate
634 // thread spawning overhead.
635 bool parallelize = (target.size() >= 512);
636 o->update(diff, target, parallelize);
637 }
638 }
639 clear_grads();
640 post_update();
641 }
642
643 bool has_same_weights(const layer& rhs, float_t eps) const {
644 auto w1 = weights();
645 auto w2 = rhs.weights();
646 if (w1.size() != w2.size()) return false;
647
648 for (size_t i = 0; i < w1.size(); i++) {
649 if (w1[i]->size() != w2[i]->size()) return false;
650
651 for (size_t j = 0; j < w1[i]->size(); j++) {
652 if (std::abs(w1[i]->at(j) - w2[i]->at(j)) > eps) return false;
653 }
654 }
655 return true;
656 }
657
658 virtual void set_sample_count(serial_size_t sample_count) {
659
660 // increase the size if necessary - but do not decrease
661 auto resize = [sample_count](tensor_t* tensor) {
662 tensor->resize(sample_count, (*tensor)[0]);
663 };
664
665 for (serial_size_t i = 0; i < in_channels_; i++) {
666 if (!is_trainable_weight(in_type_[i])) {
667 resize(ith_in_node(i)->get_data());
668 }
669 resize(ith_in_node(i)->get_gradient());
670 }
671
672 for (serial_size_t i = 0; i < out_channels_; i++) {
673 if (!is_trainable_weight(out_type_[i])) {
674 resize(ith_out_node(i)->get_data());
675 }
676 resize(ith_out_node(i)->get_gradient());
677 }
678 }
679
683 template <typename InputArchive>
684 static std::shared_ptr<layer> load_layer(InputArchive & ia);
685
686 template <typename OutputArchive>
687 static void save_layer(OutputArchive & oa, const layer& l);
688
689 template <class Archive>
690 void serialize_prolog(Archive & ar);
691
692 protected:
698 serial_size_t in_channels_;
700 serial_size_t out_channels_;
702 std::vector<vector_type> in_type_;
704 std::vector<vector_type> out_type_;
706 core::backend_t backend_type_;
708 std::shared_ptr<core::backend> backend_;
710 Device* device_ptr_ = nullptr;
711
712 private:
714 bool trainable_;
716 std::shared_ptr<weight_init::function> weight_init_;
718 std::shared_ptr<weight_init::function> bias_init_;
719
720 /* @brief Allocates the necessary edge memory in a specific
721 * incoming connection.
722 *
723 * @param i The position to store the previous edge.
724 *
725 * Graphical explanation:
726 *
727 * nullptr -- |edge| -- prev(i) ---- |layer|
728 * nullptr -- prev(i+1) -ยด
729 */
730 void alloc_input(serial_size_t i) const {
731 // the created incoming edge won't have a previous connection,
732 // for this reason first parameter is a nullptr.
733 prev_[i] = std::make_shared<edge>(nullptr, in_shape()[i], in_type_[i]);
734 }
735
736 /* @brief Allocates the necessary edge memory in a specific
737 * outcoming connection.
738 *
739 * @param i The position to store the next edge.
740 *
741 * Graphical explanation:
742 *
743 * |layer| -- next(i) ---- |edge|
744 * `- next(i+1) -- nullptr
745 */
746 void alloc_output(serial_size_t i) const {
747 // the created outcoming will have the current layer as the
748 // previous node.
749 next_[i] = std::make_shared<edge>((layer*)this,
750 out_shape()[i], out_type_[i]);
751 }
752
753 /* @brief Creates an edge between the current node and one incoming
754 * or previous node.
755 *
756 * @param i The position to store the previous edge.
757 *
758 * The method checks if the edge already exists, otherwise we create it
759 * and the necessary memory it's allocated. The method returns the pointer
760 * to the previous edge.
761 */
762 edgeptr_t ith_in_node(serial_size_t i) {
763 // in case that the edge doesn't exist, we create it
764 if (!prev_[i]) alloc_input(i);
765 return prev()[i];
766 }
767
768 /* @brief Creates an edge between the current node and one outcoming
769 * or next node.
770 *
771 * @param i The position to store the next edge.
772 *
773 * The method checks if the edge already exists, otherwise we create it
774 * and the necessary memory it's allocated. The method returns the pointer
775 * to the next edge.
776 */
777 edgeptr_t ith_out_node(serial_size_t i) {
778 // in case that the edge doesn't exist, we create it
779 if (!next_[i]) alloc_output(i);
780 return next()[i];
781 }
782
783 /* @brief Retrieves weight vector from incoming edge
784 * @param i The position of incoming edge.
785 *
786 * Returns the mutable pointer to the edge raw data.
787 */
788 vec_t* get_weight_data(serial_size_t i) {
789 assert(is_trainable_weight(in_type_[i]));
790 return &(*(ith_in_node(i)->get_data()))[0];
791 }
792
793 /* @brief Retrieves weight vector from incoming edge
794 * @param i The position of incoming edge.
795 *
796 * Returns the non mutable pointer to the edge raw data.
797 */
798 const vec_t* get_weight_data(serial_size_t i) const {
799 assert(is_trainable_weight(in_type_[i]));
800 return &(*(const_cast<layerptr_t>(this)->ith_in_node(i)->get_data()))[0];
801 }
802};
803
804inline void connect(layerptr_t head,
805 layerptr_t tail,
806 serial_size_t head_index = 0,
807 serial_size_t tail_index = 0) {
808 auto out_shape = head->out_shape()[head_index];
809 auto in_shape = tail->in_shape()[tail_index];
810
811 head->setup(false);
812
813 if (out_shape.size() != in_shape.size()) {
814 connection_mismatch(*head, *tail);
815 }
816
817 if (!head->next_[head_index]) {
818 throw nn_error("output edge must not be null");
819 }
820
821 tail->prev_[tail_index] = head->next_[head_index];
822 tail->prev_[tail_index]->add_next_node(tail);
823}
824
825inline layer& operator << (layer& lhs, layer& rhs) {
826 connect(&lhs, &rhs);
827 return rhs;
828}
829
830template <typename Char, typename CharTraits>
831std::basic_ostream<Char, CharTraits>& operator << (
832 std::basic_ostream<Char, CharTraits>& os, const layer& v) {
833 v.save(os);
834 return os;
835}
836
837template <typename Char, typename CharTraits>
838std::basic_istream<Char, CharTraits>& operator >> (
839 std::basic_istream<Char, CharTraits>& os, layer& v) {
840 v.load(os);
841 return os;
842}
843
844// error message functions
845
846inline void connection_mismatch(const layer& from, const layer& to) {
847 std::ostringstream os;
848
849 os << std::endl;
850 os << "output size of Nth layer must be equal to input of (N+1)th layer\n";
851
852 os << "layerN: " << std::setw(12) << from.layer_type() << " in:"
853 << from.in_data_size() << "("
854 << from.in_shape() << "), " << "out:"
855 << from.out_data_size() << "("
856 << from.out_shape() << ")\n";
857
858 os << "layerN+1: " << std::setw(12) << to.layer_type() << " in:"
859 << to.in_data_size() << "("
860 << to.in_shape() << "), " << "out:"
861 << to.out_data_size() << "("
862 << to.out_shape() << ")\n";
863
864 os << from.out_data_size() << " != " << to.in_data_size() << std::endl;
865 std::string detail_info = os.str();
866
867 throw nn_error("layer dimension mismatch!" + detail_info);
868}
869
870inline void data_mismatch(const layer& layer, const vec_t& data) {
871 std::ostringstream os;
872
873 os << std::endl;
874 os << "data dimension: " << data.size() << "\n";
875 os << "network dimension: " << layer.in_data_size() << "("
876 << layer.layer_type() << ":"
877 << layer.in_shape() << ")\n";
878
879 std::string detail_info = os.str();
880
881 throw nn_error("input dimension mismatch!" + detail_info);
882}
883
884inline void pooling_size_mismatch(serial_size_t in_width,
885 serial_size_t in_height,
886 serial_size_t pooling_size_x,
887 serial_size_t pooling_size_y) {
888 std::ostringstream os;
889
890 os << std::endl;
891 os << "WxH:" << in_width << "x" << in_height << std::endl;
892 os << "pooling-size:" << pooling_size_x << "x" << pooling_size_y << std::endl;
893
894 std::string detail_info = os.str();
895
896 throw nn_error("width/height not multiple of pooling size" + detail_info);
897}
898
899
900template <typename T, typename U>
901void graph_traverse(layer *root_node, T&& node_callback, U&& edge_callback) {
902 std::unordered_set<layer*> visited;
903 std::queue<layer*> S;
904
905 S.push(root_node);
906
907 while (!S.empty()) {
908 layer *curr = S.front();
909 S.pop();
910 visited.insert(curr);
911
912 node_callback(*curr);
913
914 auto edges = curr->next();
915 for (auto e : edges) {
916 if (e != nullptr)
917 edge_callback(*e);
918 }
919
920 auto prev = curr->prev_nodes();
921 for (auto p : prev) {
922 // TODO(nyanp): refactoring
923 // which type of refactoring do you have in mind for that?
924 layer* l = dynamic_cast<layer*>(p);
925 if (visited.find(l) == visited.end()) {
926 S.push(l);
927 }
928 }
929
930 auto next = curr->next_nodes();
931 for (auto n : next) {
932 // TODO(nyanp): refactoring
933 // which type of refactoring do you have in mind for that?
934 layer* l = dynamic_cast<layer*>(n);
935 if (visited.find(l) == visited.end()) {
936 S.push(l);
937 }
938 }
939 }
940}
941
942} // namespace tiny_dnn
Definition device.fwd.h:73
Simple image utility class.
Definition image.h:94
base class of all kind of NN layers
Definition layer.h:62
virtual std::vector< shape3d > out_shape() const =0
array of output shapes (width x height x depth)
layer(const std::vector< vector_type > &in_type, const std::vector< vector_type > &out_type)
Defaul layer constructor that instantiates a N-input, M-output layer.
Definition layer.h:76
virtual serial_size_t fan_in_size() const
number of incoming connections for each output unit used only for weight/bias initialization methods ...
Definition layer.h:306
virtual std::vector< shape3d > in_shape() const =0
array of input shapes (width x height x depth)
virtual std::pair< float_t, float_t > out_value_range() const
return output value range used only for calculating target value from label-id in final(output) layer...
Definition layer.h:282
std::vector< vector_type > in_type_
Vector containing the type of data for inputs.
Definition layer.h:702
bool initialized_
Flag indication whether the layer/node is initialized.
Definition layer.h:694
virtual serial_size_t fan_out_size() const
number of outgoing connections for each input unit used only for weight/bias initialization methods w...
Definition layer.h:315
std::shared_ptr< core::backend > backend_
The backend instance (deprecated)
Definition layer.h:708
serial_size_t in_channels_
The number of input vectors/edges.
Definition layer.h:698
serial_size_t out_size() const
!
Definition layer.h:181
virtual void post_update()
return delta2 of previous layer (delta2=\frac{d^2E}{da^2}, diagonal of hessian matrix) it is never ca...
Definition layer.h:422
virtual void back_propagation(const std::vector< tensor_t * > &in_data, const std::vector< tensor_t * > &out_data, std::vector< tensor_t * > &out_grad, std::vector< tensor_t * > &in_grad)=0
return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer)
serial_size_t in_size() const
!
Definition layer.h:176
virtual image output_to_image(size_t channel=0) const
< visualize latest output of this layer default implementation interpret output as 1d-vector,...
Definition layer.h:388
serial_size_t out_channels_
The number of output vectors/edges.
Definition layer.h:700
std::shared_ptr< core::backend > backend()
number of incoming edges in this layer
Definition layer.h:143
virtual void set_context(net_phase ctx)
notify changing context (train <=> test)
Definition layer.h:427
core::backend_t backend_type_
The current backend type for operations.
Definition layer.h:706
bool parallelize_
Flag indicating whether the layer/node operations ara paralellized.
Definition layer.h:696
serial_size_t in_channels() const
number of outgoing edges in this layer
Definition layer.h:146
Device * device_ptr_
Pointer to the device on which the layer/node will run.
Definition layer.h:710
virtual void forward_propagation(const std::vector< tensor_t * > &in_data, std::vector< tensor_t * > &out_data)=0
virtual std::string layer_type() const =0
name of layer, should be unique for each concrete class
std::vector< vector_type > out_type_
Vector containing the type of data for outputs.
Definition layer.h:704
static std::shared_ptr< layer > load_layer(InputArchive &ia)
generate layer from cereal's Archive
Definition deserialization_helper.h:159
base class of all kind of tinny-cnn data
Definition node.h:59