tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
network.h
1/*
2 Copyright (c) 2013, Taiga Nomi
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28#include <iostream>
29#include <stdexcept>
30#include <algorithm>
31#include <iterator>
32#include <iomanip>
33#include <map>
34#include <set>
35#include <limits>
36#include <string>
37#include <vector>
38
39#include "tiny_dnn/nodes.h"
40#include "tiny_dnn/util/util.h"
41#include "tiny_dnn/lossfunctions/loss_function.h"
42#include "tiny_dnn/activations/activation_function.h"
43
44namespace tiny_dnn {
45
46enum class content_type {
47 weights,
48 model,
49 weights_and_model
50};
51
52enum class file_format {
53 binary,
54 json
55};
56
57struct result {
58 result() : num_success(0), num_total(0) {}
59
60 float_t accuracy() const {
61 return float_t(num_success * 100.0 / num_total);
62 }
63
64 template <typename Char, typename CharTraits>
65 void print_summary(std::basic_ostream<Char, CharTraits>& os) const {
66 os << "accuracy:" << accuracy()
67 << "% (" << num_success << "/"
68 << num_total << ")" << std::endl;
69 }
70
71 template <typename Char, typename CharTraits>
72 void print_detail(std::basic_ostream<Char, CharTraits>& os) const {
73 print_summary(os);
74 auto all_labels = labels();
75
76 os << std::setw(5) << "*" << " ";
77 for (auto c : all_labels)
78 os << std::setw(5) << c << " ";
79 os << std::endl;
80
81 for (auto r : all_labels) {
82 os << std::setw(5) << r << " ";
83 const auto row_iter = confusion_matrix.find(r);
84 for (auto c : all_labels) {
85 int count = 0;
86 if (row_iter != confusion_matrix.end()) {
87 const auto& row = row_iter->second;
88 const auto col_iter = row.find(c);
89 if (col_iter != row.end()) {
90 count = col_iter->second;
91 }
92 }
93 os << std::setw(5) << count << " ";
94 }
95 os << std::endl;
96 }
97 }
98
99 std::set<label_t> labels() const {
100 std::set<label_t> all_labels;
101 for (auto r : confusion_matrix) {
102 all_labels.insert(r.first);
103 for (auto c : r.second)
104 all_labels.insert(c.first);
105 }
106 return all_labels;
107 }
108
109 int num_success;
110 int num_total;
111 std::map<label_t, std::map<label_t, int> > confusion_matrix;
112};
113
114enum grad_check_mode {
115 GRAD_CHECK_ALL,
116 GRAD_CHECK_RANDOM
117};
118
119template <typename NetType>
120class network;
121
122template <typename Layer>
123network<sequential>& operator << (network<sequential>& n, Layer&& l);
124
125void construct_graph(network<graph>& graph,
126 const std::vector<std::shared_ptr<layer>>& inputs,
127 const std::vector<std::shared_ptr<layer>>& outputs);
128void construct_graph(network<graph>& graph,
129 const std::vector<layer*>& inputs,
130 const std::vector<layer*>& outputs);
166template<typename NetType>
167class network {
168 public:
169 typedef typename std::vector<layerptr_t>::iterator iterator;
170 typedef typename std::vector<layerptr_t>::const_iterator const_iterator;
171
172 explicit network(const std::string& name = "") : name_(name) {}
173
177 std::string name() const { return name_; }
178
182 void init_weight() { net_.setup(true); }
183
187 vec_t predict(const vec_t& in) { return fprop(in); }
188
192 tensor_t predict(const tensor_t& in) { return fprop(in); }
193
197 std::vector<tensor_t> predict(const std::vector<tensor_t>& in) { return fprop(in); }
198
202 float_t predict_max_value(const vec_t& in) {
203 return fprop_max(in);
204 }
205
209 label_t predict_label(const vec_t& in) {
210 return fprop_max_index(in);
211 }
212
218 template <typename Range>
219 vec_t predict(const Range& in) {
220 using std::begin; // for ADL
221 using std::end;
222 return predict(vec_t(begin(in), end(in)));
223 }
224
225
245 template <typename Error, typename Optimizer,
246 typename OnBatchEnumerate, typename OnEpochEnumerate>
248 const std::vector<vec_t>& inputs,
249 const std::vector<label_t>& class_labels,
250 size_t batch_size,
251 int epoch,
254 const bool reset_weights = false,
255 const int n_threads = CNN_TASK_SIZE,
256 const std::vector<vec_t>& t_cost = std::vector<vec_t>()) {
257 std::vector<tensor_t> input_tensor, output_tensor, t_cost_tensor;
258 normalize_tensor(inputs, input_tensor);
259 normalize_tensor(class_labels, output_tensor);
260 if (!t_cost.empty()) normalize_tensor(t_cost, t_cost_tensor);
261
265 }
266
309 template <typename Error, typename Optimizer,
310 typename OnBatchEnumerate, typename OnEpochEnumerate,
311 typename T, typename U>
313 const std::vector<T>& inputs,
314 const std::vector<U>& desired_outputs,
315 size_t batch_size,
316 int epoch,
319 const bool reset_weights = false,
320 const int n_threads = CNN_TASK_SIZE,
321 const std::vector<U>& t_cost = std::vector<U>()) {
322 std::vector<tensor_t> input_tensor, output_tensor, t_cost_tensor;
323 normalize_tensor(inputs, input_tensor);
324 normalize_tensor(desired_outputs, output_tensor);
325 if (!t_cost.empty()) normalize_tensor(t_cost, t_cost_tensor);
326
330 }
331
339 template<typename Error, typename Optimizer, typename T, typename U>
341 const std::vector<T>& inputs,
342 const std::vector<U>& desired_outputs,
343 size_t batch_size = 1,
344 int epoch = 1) {
345 return fit<Error>(optimizer, inputs, desired_outputs,
346 batch_size, epoch, nop, nop);
347 }
348
356 template<typename Error, typename Optimizer>
358 const std::vector<vec_t>& inputs,
359 const std::vector<label_t>& class_labels,
360 size_t batch_size = 1,
361 int epoch = 1) {
362 return train<Error>(optimizer, inputs, class_labels,
363 batch_size, epoch, nop, nop);
364 }
365
369 template<typename Error, typename Optimizer>
371 const std::vector<vec_t>& in,
372 const std::vector<vec_t>& t,
373 size_t batch_size = 1,
374 int epoch = 1) {
375 return fit<Error>(optimizer, in, t, batch_size, epoch, nop, nop);
376 }
377
382 void set_netphase(net_phase phase) {
383 for (auto n : net_) {
384 n->set_context(phase);
385 }
386 }
387
391 result test(const std::vector<vec_t>& in, const std::vector<label_t>& t) {
393 set_netphase(net_phase::test);
394 for (size_t i = 0; i < in.size(); i++) {
395 const label_t predicted = fprop_max_index(in[i]);
396 const label_t actual = t[i];
397
398 if (predicted == actual) test_result.num_success++;
399 test_result.num_total++;
400 test_result.confusion_matrix[predicted][actual]++;
401 }
402 return test_result;
403 }
404
408 std::vector<vec_t> test(const std::vector<vec_t>& in) {
409 std::vector<vec_t> test_result(in.size());
410 set_netphase(net_phase::test);
411 for (size_t i = 0; i < in.size(); i++) {
412 test_result[i] = predict(in[i]);
413 }
414 return test_result;
415 }
416
420 template <typename E>
421 float_t get_loss(const std::vector<vec_t>& in,
422 const std::vector<vec_t>& t) {
424
425 for (size_t i = 0; i < in.size(); i++) {
426 const vec_t predicted = predict(in[i]);
427 sum_loss += E::f(predicted, t[i]);
428 }
429 return sum_loss;
430 }
431
435 template <typename E, typename T>
436 float_t get_loss(const std::vector<T>& in, const std::vector<tensor_t>& t) {
438 std::vector<tensor_t> in_tensor;
439 normalize_tensor(in, in_tensor);
440
441 for (size_t i = 0; i < in.size(); i++) {
442 const tensor_t predicted = predict(in_tensor[i]);
443 for (size_t j = 0; j < predicted.size(); j++) {
444 sum_loss += E::f(predicted[j], t[i][j]);
445 }
446 }
447 return sum_loss;
448 }
449
455 template <typename E>
456 bool gradient_check(const std::vector<tensor_t>& in,
457 const std::vector<std::vector<label_t>>& t,
458 float_t eps, grad_check_mode mode) {
459 assert(in.size() == t.size());
460
461 std::vector<tensor_t> v(t.size());
462 const serial_size_t sample_count = static_cast<serial_size_t>(t.size());
463 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
464 net_.label2vec(&t[sample][0], static_cast<serial_size_t>(t[sample].size()), &v[sample]);
465 }
466
467 for (auto current : net_) { // ignore first input layer
468 if (current->weights().size() < 2) {
469 continue;
470 }
471 vec_t& w = *current->weights()[0];
472 vec_t& b = *current->weights()[1];
473 tensor_t& dw = (*current->weights_grads()[0]);
474 tensor_t& db = (*current->weights_grads()[1]);
475
476 if (w.empty()) continue;
477
478 switch (mode) {
479 case GRAD_CHECK_ALL:
480 for (int i = 0; i < static_cast<int>(w.size()); i++)
481 if (!calc_delta<E>(in, v, w, dw, i, eps)) {
482 return false;
483 }
484 for (int i = 0; i < static_cast<int>(b.size()); i++)
485 if (!calc_delta<E>(in, v, b, db, i, eps)) {
486 return false;
487 }
488 break;
489 case GRAD_CHECK_RANDOM:
490 for (int i = 0; i < 10; i++)
491 if (!calc_delta<E>(in, v, w, dw, uniform_idx(w), eps)) {
492 return false;
493 }
494 for (int i = 0; i < 10; i++)
495 if (!calc_delta<E>(in, v, b, db, uniform_idx(b), eps)) {
496 return false;
497 }
498 break;
499 default:
500 throw nn_error("unknown grad-check type");
501 }
502 }
503 return true;
504 }
505
509 size_t layer_size() const {
510 return net_.size();
511 }
512
516 size_t depth() const {
517 return layer_size();
518 }
519
523 const layer* operator[] (size_t index) const {
524 return net_[index];
525 }
526
530 layer* operator[] (size_t index) {
531 return net_[index];
532 }
533
538 template <typename T>
539 const T& at(size_t index) const {
540 return net_.template at<T>(index);
541 }
542
543 template <typename T>
544 T& at(size_t index) {
545 return net_.template at<T>(index);
546 }
547
551 serial_size_t out_data_size() const {
552 return net_.out_data_size();
553 }
554
558 serial_size_t in_data_size() const {
559 return net_.in_data_size();
560 }
561
565 template <typename WeightInit>
567 auto ptr = std::make_shared<WeightInit>(f);
568 for (auto& l : net_)
569 l->weight_init(ptr);
570 return *this;
571 }
572
576 template <typename BiasInit>
578 auto ptr = std::make_shared<BiasInit>(f);
579 for (auto& l : net_)
580 l->bias_init(ptr);
581 return *this;
582 }
583
587 template <typename T>
588 bool has_same_weights(const network<T>& rhs, float_t eps) const {
589 auto first1 = net_.begin();
590 auto first2 = rhs.net_.begin();
591 auto last1 = net_.end();
592 auto last2 = rhs.net_.end();
593
594 for (; first1 != last1 && first2 != last2; ++first1, ++first2)
595 if (!(*first1)->has_same_weights(**first2, eps)) return false;
596 return true;
597 }
598
599 iterator begin() { return net_.begin(); }
600 iterator end() { return net_.end(); }
601 const_iterator begin() const { return net_.begin(); }
602 const_iterator end() const { return net_.end(); }
603
604 void load(const std::string& filename,
605 content_type what = content_type::weights_and_model,
606 file_format format = file_format::binary) {
607 std::ifstream ifs(filename.c_str(), std::ios::binary | std::ios::in);
608 if (ifs.fail() || ifs.bad())
609 throw nn_error("failed to open:" + filename);
610
611 switch (format) {
612 case file_format::binary:
613 {
614 cereal::BinaryInputArchive bi(ifs);
615 from_archive(bi, what);
616 }
617 break;
618 case file_format::json:
619 {
620 cereal::JSONInputArchive ji(ifs);
621 from_archive(ji, what);
622 }
623 break;
624 default:
625 throw nn_error("invalid serialization format");
626 }
627 }
628
629 void save(const std::string& filename,
630 content_type what = content_type::weights_and_model,
631 file_format format = file_format::binary) const {
632 std::ofstream ofs(filename.c_str(), std::ios::binary | std::ios::out);
633 if (ofs.fail() || ofs.bad())
634 throw nn_error("failed to open:" + filename);
635
636 switch (format) {
637 case file_format::binary:
638 {
639 cereal::BinaryOutputArchive bo(ofs);
640 to_archive(bo, what);
641 }
642 break;
643 case file_format::json:
644 {
645 cereal::JSONOutputArchive jo(ofs);
646 to_archive(jo, what);
647 }
648 break;
649 default:
650 throw nn_error("invalid serialization format");
651 }
652 }
653
657 std::string to_json() const {
658 std::stringstream ss;
659 {
660 cereal::JSONOutputArchive oa(ss);
661 to_archive(oa, content_type::model);
662 }
663 return ss.str();
664 }
665
669 void from_json(const std::string& json_string) {
670 std::stringstream ss;
671 ss << json_string;
672 cereal::JSONInputArchive ia(ss);
673 from_archive(ia, content_type::model);
674 }
675
677 void save(std::ostream& os) const {
678 os.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
679 net_.save(os);
680 }
681
683 void load(std::istream& is) {
684 is.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
685 net_.load(is);
686 }
687
692 void fast_load(const char* filepath) {
693 FILE* stream = fopen(filepath, "r");
694 std::vector<float_t> data;
695 double temp;
696 while (fscanf(stream, "%lf", &temp) > 0)
697 data.push_back(float_t(temp));
698 fclose(stream);
699
700 net_.load(data);
701 }
702
703 template <typename OutputArchive>
704 void to_archive(OutputArchive& ar,
705 content_type what = content_type::weights_and_model) const {
706 if (what == content_type::model ||
707 what == content_type::weights_and_model) {
708 net_.save_model(ar);
709 }
710 if (what == content_type::weights ||
711 what == content_type::weights_and_model) {
712 net_.save_weights(ar);
713 }
714 }
715
716 template <typename InputArchive>
717 void from_archive(InputArchive& ar,
718 content_type what = content_type::weights_and_model) {
719 if (what == content_type::model ||
720 what == content_type::weights_and_model) {
721 net_.load_model(ar);
722 }
723 if (what == content_type::weights ||
724 what == content_type::weights_and_model) {
725 net_.load_weights(ar);
726 }
727 }
728
729 protected:
730 float_t fprop_max(const vec_t& in, int idx = 0) {
731 const vec_t& prediction = fprop(in, idx);
732 return *std::max_element(std::begin(prediction), std::end(prediction));
733 }
734
735 label_t fprop_max_index(const vec_t& in) {
736 return label_t(max_index(fprop(in)));
737 }
738
739 private:
740 template <typename Layer>
741 friend network<sequential>& operator << (network<sequential>& n, Layer&& l);
742
743 friend void construct_graph(network<graph>& graph,
744 const std::vector<std::shared_ptr<layer>>& inputs,
745 const std::vector<std::shared_ptr<layer>>& outputs);
746
747 friend void construct_graph(network<graph>& graph,
748 const std::vector<layer*>& inputs,
749 const std::vector<layer*>& outputs);
750
751 template <typename Error, typename Optimizer,
752 typename OnBatchEnumerate, typename OnEpochEnumerate>
753 bool fit(Optimizer& optimizer,
754 const std::vector<tensor_t>& inputs,
755 const std::vector<tensor_t>& desired_outputs,
756 size_t batch_size,
757 int epoch,
758 OnBatchEnumerate on_batch_enumerate,
759 OnEpochEnumerate on_epoch_enumerate,
760 const bool reset_weights = false,
761 const int n_threads = CNN_TASK_SIZE,
762 const std::vector<tensor_t>& t_cost = std::vector<tensor_t>()) {
763 // check_training_data(in, t);
764 check_target_cost_matrix(desired_outputs, t_cost);
765 set_netphase(net_phase::train);
766 net_.setup(reset_weights);
767
768 for (auto n : net_)
769 n->set_parallelize(true);
770 optimizer.reset();
771 for (int iter = 0; iter < epoch; iter++) {
772 for (size_t i = 0; i < inputs.size(); i += batch_size) {
773 train_once<Error>(optimizer, &inputs[i], &desired_outputs[i],
774 static_cast<int>(std::min(batch_size, inputs.size() - i)),
775 n_threads,
776 get_target_cost_sample_pointer(t_cost, i));
777 on_batch_enumerate();
778
779 /* if (i % 100 == 0 && layers_.is_exploded()) {
780 std::cout << "[Warning]Detected infinite value in weight. stop learning." << std::endl;
781 return false;
782 } */
783 }
784 on_epoch_enumerate();
785 }
786 set_netphase(net_phase::test);
787 return true;
788 }
789
795 template <typename E, typename Optimizer>
796 void train_once(Optimizer& optimizer,
797 const tensor_t* in,
798 const tensor_t* t,
799 int size,
800 const int nbThreads,
801 const tensor_t* t_cost) {
802 if (size == 1) {
803 bprop<E>(fprop(in[0]), t[0], t_cost ? t_cost[0] : tensor_t());
804 net_.update_weights(&optimizer, 1);
805 } else {
806 train_onebatch<E>(optimizer, in, t, size, nbThreads, t_cost);
807 }
808 }
809
817 template <typename E, typename Optimizer>
818 void train_onebatch(Optimizer& optimizer,
819 const tensor_t* in,
820 const tensor_t* t,
821 int batch_size,
822 const int num_tasks,
823 const tensor_t* t_cost) {
824 std::vector<tensor_t> in_batch(&in[0], &in[0] + batch_size);
825 std::vector<tensor_t> t_batch(&t[0], &t[0] + batch_size);
826 std::vector<tensor_t> t_cost_batch = t_cost
827 ? std::vector<tensor_t>(&t_cost[0], &t_cost[0] + batch_size)
828 : std::vector<tensor_t>();
829
830 bprop<E>(fprop(in_batch), t_batch, t_cost_batch);
831 net_.update_weights(&optimizer, batch_size);
832 }
833
834 vec_t fprop(const vec_t& in) {
835 if (in.size() != (size_t)in_data_size())
836 data_mismatch(**net_.begin(), in);
837#if 0
838 return fprop(std::vector<vec_t>{ in })[0];
839#else
840 // a workaround to reduce memory consumption by skipping wrapper function
841 std::vector<tensor_t> a(1);
842 a[0].emplace_back(in);
843 return fprop(a)[0][0];
844#endif
845 }
846
847 // convenience wrapper for the function below
848 std::vector<vec_t> fprop(const std::vector<vec_t>& in) {
849 return fprop(std::vector<tensor_t>{ in })[0];
850 }
851
852 std::vector<tensor_t> fprop(const std::vector<tensor_t>& in) {
853 return net_.forward(in);
854 }
855
856// template <typename E>
857// float_t get_loss(const vec_t& out, const vec_t& t) {
858// assert(out.size() == t.size());
859// return E::f(out, t);
860// }
861
862 template <typename E>
863 bool calc_delta(const std::vector<tensor_t>& in,
864 const std::vector<tensor_t>& v,
865 vec_t& w, tensor_t& dw, int check_index, double eps) {
866 static const float_t delta = std::sqrt(
867 std::numeric_limits<float_t>::epsilon());
868
869 assert(in.size() == v.size());
870
871 const serial_size_t sample_count = static_cast<serial_size_t>(in.size());
872
873 assert(sample_count > 0);
874
875 // at the moment, channel count must be 1
876 assert(in[0].size() == 1);
877 assert(v[0].size() == 1);
878
879 // clear previous results, if any
880 for (vec_t& dw_sample : dw) {
881 std::fill(dw_sample.begin(), dw_sample.end(), float_t(0));
882 }
883
884 // calculate dw/dE by numeric
885 float_t prev_w = w[check_index];
886
887 float_t f_p = float_t(0);
888 w[check_index] = prev_w + delta;
889 for (serial_size_t i = 0; i < sample_count; i++) {
890 f_p += get_loss<E>(in[i], v[i]);
891 }
892
893 float_t f_m = float_t(0);
894 w[check_index] = prev_w - delta;
895 for (serial_size_t i = 0; i < sample_count; i++) {
896 f_m += get_loss<E>(in[i], v[i]);
897 }
898
899 float_t delta_by_numerical = (f_p - f_m) / (float_t(2) * delta);
900 w[check_index] = prev_w;
901
902 // calculate dw/dE by bprop
903 bprop<E>(fprop(in), v, std::vector<tensor_t>());
904
905 float_t delta_by_bprop = 0;
906 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
907 delta_by_bprop += dw[sample][check_index];
908 }
909 net_.clear_grads();
910
911 return std::abs(delta_by_bprop - delta_by_numerical) <= eps;
912 }
913
914 // convenience wrapper for the function below
915 template <typename E>
916 void bprop(const std::vector<vec_t>& out,
917 const std::vector<vec_t>& t, const std::vector<vec_t>& t_cost) {
918 bprop<E>(std::vector<tensor_t>{out},
919 std::vector<tensor_t>{t}, std::vector<tensor_t>{t_cost});
920 }
921
922 template <typename E>
923 void bprop(const std::vector<tensor_t>& out,
924 const std::vector<tensor_t>& t,
925 const std::vector<tensor_t>& t_cost) {
926 std::vector<tensor_t> delta = gradient<E>(out, t, t_cost);
927 net_.backward(delta);
928 }
929
930 void check_t(size_t i, label_t t, serial_size_t dim_out) {
931 if (t >= dim_out) {
932 std::ostringstream os;
933 os << format_str("t[%u]=%u, dim(net output)=%u\n", i, t, dim_out);
934 os << "in classification task, dim(net output) ";
935 os << "must be greater than max class id.\n";
936 if (dim_out == 1) {
937 os << "\n(for regression, use vector<vec_t> ";
938 os << "instead of vector<label_t> for training signal)\n";
939 }
940
941 throw nn_error("output dimension mismatch!\n " + os.str());
942 }
943 }
944
945 void check_t(size_t i, const vec_t& t, serial_size_t dim_out) {
946 if (t.size() != dim_out) {
947 throw nn_error(format_str(
948 "output dimension mismatch!\n dim(target[%u])=%u, "
949 "dim(network output size=%u", i, t.size(), dim_out));
950 }
951 }
952
953 template <typename T>
954 void check_training_data(const std::vector<vec_t>& in,
955 const std::vector<T>& t) {
956 serial_size_t dim_in = in_data_size();
957 serial_size_t dim_out = out_data_size();
958
959 if (in.size() != t.size()) {
960 throw nn_error("size of training data must be equal to label data");
961 }
962
963 size_t num = in.size();
964
965 for (size_t i = 0; i < num; i++) {
966 if (in[i].size() != dim_in) {
967 throw nn_error(format_str(
968 "input dimension mismatch!\n dim(data[%u])=%d, "
969 "dim(network input)=%u", i, in[i].size(), dim_in));
970 }
971 check_t(i, t[i], dim_out);
972 }
973 }
974
975 void check_target_cost_matrix(const std::vector<tensor_t>& t,
976 const std::vector<tensor_t>& t_cost) {
977 if (!t_cost.empty()) {
978 if (t.size() != t_cost.size()) {
979 throw nn_error("if target cost is supplied, "
980 "its length must equal that of target data");
981 }
982
983 for (size_t i = 0, end = t.size(); i < end; i++) {
984 check_target_cost_element(t[i], t_cost[i]);
985 }
986 }
987 }
988
989 // regression
990 void check_target_cost_element(const vec_t& t, const vec_t& t_cost) {
991 if (t.size() != t_cost.size()) {
992 throw nn_error("if target cost is supplied for a regression task, "
993 "its shape must be identical to the target data");
994 }
995 }
996 void check_target_cost_element(const tensor_t& t, const tensor_t& t_cost) {
997 if (t.size() != t_cost.size()) {
998 throw nn_error("if target cost is supplied for a regression task, "
999 "its shape must be identical to the target data");
1000 }
1001 for (size_t i = 0; i < t.size(); i++)
1002 check_target_cost_element(t[i], t_cost[i]);
1003 }
1004
1005 const tensor_t* get_target_cost_sample_pointer(
1006 const std::vector<tensor_t>& t_cost, size_t i) {
1007 if (!t_cost.empty()) {
1008 assert(i < t_cost.size());
1009 return &(t_cost[i]);
1010 } else {
1011 return nullptr;
1012 }
1013 }
1014
1015 void normalize_tensor(const std::vector<tensor_t>& inputs,
1016 std::vector<tensor_t>& normalized) {
1017 normalized = inputs;
1018 }
1019
1020 void normalize_tensor(const std::vector<vec_t>& inputs,
1021 std::vector<tensor_t>& normalized) {
1022 normalized.reserve(inputs.size());
1023 for (size_t i = 0; i < inputs.size(); i++)
1024 normalized.emplace_back(tensor_t{ inputs[i] });
1025 }
1026
1027 void normalize_tensor(const std::vector<label_t>& inputs,
1028 std::vector<tensor_t>& normalized) {
1029 std::vector<vec_t> vec;
1030 normalized.reserve(inputs.size());
1031 net_.label2vec(&inputs[0], static_cast<serial_size_t>(inputs.size()), &vec);
1032 normalize_tensor(vec, normalized);
1033 }
1034
1035 std::string name_;
1036 NetType net_;
1037};
1038
1049inline std::vector<vec_t> image2vec(const float_t* data,
1050 const unsigned int rows,
1051 const unsigned int cols,
1052 const unsigned int sizepatch,
1053 const unsigned int step = 1) {
1054 assert(step > 0);
1055 std::vector<vec_t> res((cols-sizepatch) * (rows-sizepatch) / (step*step),
1056 vec_t(sizepatch*sizepatch));
1057 for_i((cols-sizepatch)*(rows-sizepatch)/(step*step), [&](int count) {
1058 const int j = step*(count / ((cols-sizepatch)/step));
1059 const int i = step*(count % ((cols-sizepatch)/step));
1060
1061 // vec_t sample(sizepatch*sizepatch);
1062
1063 if (i+sizepatch < cols && j+sizepatch < rows) {
1064 for (unsigned int k = 0; k < sizepatch*sizepatch; k++) {
1065 // for_i(sizepatch*sizepatch, [&](int k) {
1066 unsigned int y = k / sizepatch + j;
1067 unsigned int x = k % sizepatch + i;
1068 res[count][k] = data[x+y*cols];
1069 }
1070 //});
1071 // res[count] = (sample);
1072 }
1073 });
1074 return res;
1075}
1076
1077template <typename Layer>
1078network<sequential>& operator << (network<sequential>& n, Layer&& l) {
1079 n.net_.add(std::forward<Layer>(l));
1080 return n;
1081}
1082
1083template <typename NetType, typename Char, typename CharTraits>
1084std::basic_ostream<Char, CharTraits>& operator << (std::basic_ostream<Char,
1085 CharTraits>& os,
1086 const network<NetType>& n) {
1087 n.save(os);
1088 return os;
1089}
1090
1091template <typename NetType, typename Char, typename CharTraits>
1092std::basic_istream<Char, CharTraits>& operator >> (std::basic_istream<Char,
1093 CharTraits>& os,
1094 network<NetType>& n) {
1095 n.load(os);
1096 return os;
1097}
1098
1099inline void construct_graph(network<graph>& graph,
1100 const std::vector<layer*>& inputs,
1101 const std::vector<layer*>& outputs) {
1102 graph.net_.construct(inputs, outputs);
1103}
1104
1105inline void construct_graph(network<graph>& graph,
1106 const std::vector<std::shared_ptr<layer>>& inputs,
1107 const std::vector<std::shared_ptr<layer>>& outputs) {
1108 std::vector<layer*> in_ptr, out_ptr;
1109 auto shared2ptr = [](std::shared_ptr<layer> l) { return l.get(); };
1110
1111 std::transform(inputs.begin(), inputs.end(),
1112 std::back_inserter(in_ptr), shared2ptr);
1113 std::transform(outputs.begin(), outputs.end(),
1114 std::back_inserter(out_ptr), shared2ptr);
1115
1116 graph.net_.construct(in_ptr, out_ptr);
1117}
1118} // namespace tiny_dnn
Simple image utility class.
Definition image.h:94
base class of all kind of NN layers
Definition layer.h:62
A model of neural networks in tiny-dnn.
Definition network.h:167
void set_netphase(net_phase phase)
set the netphase to train or test
Definition network.h:382
const layer * operator[](size_t index) const
return raw pointer of index-th layer
Definition network.h:523
vec_t predict(const vec_t &in)
executes forward-propagation and returns output
Definition network.h:187
bool has_same_weights(const network< T > &rhs, float_t eps) const
returns if 2 networks have almost(<eps) the same weights
Definition network.h:588
std::vector< vec_t > test(const std::vector< vec_t > &in)
generate output for each input
Definition network.h:408
bool train(Optimizer &optimizer, const std::vector< vec_t > &inputs, const std::vector< label_t > &class_labels, size_t batch_size, int epoch, OnBatchEnumerate on_batch_enumerate, OnEpochEnumerate on_epoch_enumerate, const bool reset_weights=false, const int n_threads=CNN_TASK_SIZE, const std::vector< vec_t > &t_cost=std::vector< vec_t >())
trains the network for a fixed number of epochs (for classification task)
Definition network.h:247
void init_weight()
explicitly initialize weights of all layers
Definition network.h:182
std::string to_json() const
save the network architecture as json string
Definition network.h:657
size_t depth() const
Definition network.h:516
vec_t predict(const Range &in)
executes forward-propagation and returns output
Definition network.h:219
void save(std::ostream &os) const
Definition network.h:677
result test(const std::vector< vec_t > &in, const std::vector< label_t > &t)
test and generate confusion-matrix for classification task
Definition network.h:391
float_t get_loss(const std::vector< T > &in, const std::vector< tensor_t > &t)
calculate loss value (the smaller, the better) for regression task
Definition network.h:436
void fast_load(const char *filepath)
load network weights from filepath, 30 times faster than stream reading
Definition network.h:692
float_t get_loss(const std::vector< vec_t > &in, const std::vector< vec_t > &t)
calculate loss value (the smaller, the better) for regression task
Definition network.h:421
network & bias_init(const BiasInit &f)
set bias initializer to all layers
Definition network.h:577
const T & at(size_t index) const
return index-th layer as <T> throw nn_error if index-th layer cannot be converted to T
Definition network.h:539
bool train(Optimizer &optimizer, const std::vector< vec_t > &in, const std::vector< vec_t > &t, size_t batch_size=1, int epoch=1)
Definition network.h:370
bool fit(Optimizer &optimizer, const std::vector< T > &inputs, const std::vector< U > &desired_outputs, size_t batch_size=1, int epoch=1)
Definition network.h:340
label_t predict_label(const vec_t &in)
executes forward-propagation and returns maximum output index
Definition network.h:209
network & weight_init(const WeightInit &f)
set weight initializer to all layers
Definition network.h:566
tensor_t predict(const tensor_t &in)
executes forward-propagation and returns output
Definition network.h:192
serial_size_t in_data_size() const
return total number of elements of input data
Definition network.h:558
bool train(Optimizer &optimizer, const std::vector< vec_t > &inputs, const std::vector< label_t > &class_labels, size_t batch_size=1, int epoch=1)
Definition network.h:357
std::vector< tensor_t > predict(const std::vector< tensor_t > &in)
executes forward-propagation and returns output
Definition network.h:197
size_t layer_size() const
return number of layers
Definition network.h:509
void from_json(const std::string &json_string)
load the network architecture from json string
Definition network.h:669
float_t predict_max_value(const vec_t &in)
executes forward-propagation and returns maximum output
Definition network.h:202
bool fit(Optimizer &optimizer, const std::vector< T > &inputs, const std::vector< U > &desired_outputs, size_t batch_size, int epoch, OnBatchEnumerate on_batch_enumerate, OnEpochEnumerate on_epoch_enumerate, const bool reset_weights=false, const int n_threads=CNN_TASK_SIZE, const std::vector< U > &t_cost=std::vector< U >())
trains the network for a fixed number of epochs to generate desired output.
Definition network.h:312
serial_size_t out_data_size() const
return total number of elements of output data
Definition network.h:551
std::string name() const
name of the network
Definition network.h:177
bool gradient_check(const std::vector< tensor_t > &in, const std::vector< std::vector< label_t > > &t, float_t eps, grad_check_mode mode)
checking gradients calculated by bprop detail information: http://ufldl.stanford.edu/wiki/index....
Definition network.h:456
error exception class for tiny-dnn
Definition nn_error.h:37
base class of optimizer usesHessian : true if an optimizer uses hessian (2nd order derivative of loss...
Definition optimizer.h:37
Definition network.h:57