678 os.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
683 void load(std::istream&
is) {
684 is.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
694 std::vector<float_t> data;
703 template <
typename OutputArchive>
705 content_type what = content_type::weights_and_model)
const {
706 if (what == content_type::model ||
707 what == content_type::weights_and_model) {
710 if (what == content_type::weights ||
711 what == content_type::weights_and_model) {
712 net_.save_weights(ar);
716 template <
typename InputArchive>
717 void from_archive(InputArchive& ar,
718 content_type what = content_type::weights_and_model) {
719 if (what == content_type::model ||
720 what == content_type::weights_and_model) {
723 if (what == content_type::weights ||
724 what == content_type::weights_and_model) {
725 net_.load_weights(ar);
730 float_t fprop_max(
const vec_t& in,
int idx = 0) {
731 const vec_t& prediction = fprop(in, idx);
732 return *std::max_element(std::begin(prediction), std::end(prediction));
735 label_t fprop_max_index(
const vec_t& in) {
736 return label_t(max_index(fprop(in)));
740 template <
typename Layer>
741 friend network<sequential>& operator << (network<sequential>& n, Layer&& l);
743 friend void construct_graph(network<graph>& graph,
744 const std::vector<std::shared_ptr<layer>>& inputs,
745 const std::vector<std::shared_ptr<layer>>& outputs);
747 friend void construct_graph(network<graph>& graph,
748 const std::vector<layer*>& inputs,
749 const std::vector<layer*>& outputs);
751 template <
typename Error,
typename Optimizer,
752 typename OnBatchEnumerate,
typename OnEpochEnumerate>
753 bool fit(Optimizer& optimizer,
754 const std::vector<tensor_t>& inputs,
755 const std::vector<tensor_t>& desired_outputs,
758 OnBatchEnumerate on_batch_enumerate,
759 OnEpochEnumerate on_epoch_enumerate,
760 const bool reset_weights =
false,
761 const int n_threads = CNN_TASK_SIZE,
762 const std::vector<tensor_t>& t_cost = std::vector<tensor_t>()) {
764 check_target_cost_matrix(desired_outputs, t_cost);
766 net_.setup(reset_weights);
769 n->set_parallelize(true);
771 for (
int iter = 0; iter < epoch; iter++) {
772 for (
size_t i = 0; i < inputs.size(); i += batch_size) {
773 train_once<Error>(optimizer, &inputs[i], &desired_outputs[i],
774 static_cast<int>(std::min(batch_size, inputs.size() - i)),
776 get_target_cost_sample_pointer(t_cost, i));
777 on_batch_enumerate();
784 on_epoch_enumerate();
795 template <
typename E,
typename Optimizer>
796 void train_once(Optimizer& optimizer,
801 const tensor_t* t_cost) {
803 bprop<E>(fprop(in[0]), t[0], t_cost ? t_cost[0] : tensor_t());
804 net_.update_weights(&optimizer, 1);
806 train_onebatch<E>(optimizer, in, t, size, nbThreads, t_cost);
817 template <
typename E,
typename Optimizer>
818 void train_onebatch(Optimizer& optimizer,
823 const tensor_t* t_cost) {
824 std::vector<tensor_t> in_batch(&in[0], &in[0] + batch_size);
825 std::vector<tensor_t> t_batch(&t[0], &t[0] + batch_size);
826 std::vector<tensor_t> t_cost_batch = t_cost
827 ? std::vector<tensor_t>(&t_cost[0], &t_cost[0] + batch_size)
828 : std::vector<tensor_t>();
830 bprop<E>(fprop(in_batch), t_batch, t_cost_batch);
831 net_.update_weights(&optimizer, batch_size);
834 vec_t fprop(
const vec_t& in) {
836 data_mismatch(**net_.begin(), in);
838 return fprop(std::vector<vec_t>{ in })[0];
841 std::vector<tensor_t> a(1);
842 a[0].emplace_back(in);
843 return fprop(a)[0][0];
848 std::vector<vec_t> fprop(
const std::vector<vec_t>& in) {
849 return fprop(std::vector<tensor_t>{ in })[0];
852 std::vector<tensor_t> fprop(
const std::vector<tensor_t>& in) {
853 return net_.forward(in);
862 template <
typename E>
863 bool calc_delta(
const std::vector<tensor_t>& in,
864 const std::vector<tensor_t>& v,
865 vec_t& w, tensor_t& dw,
int check_index,
double eps) {
866 static const float_t delta = std::sqrt(
867 std::numeric_limits<float_t>::epsilon());
869 assert(in.size() == v.size());
871 const serial_size_t sample_count =
static_cast<serial_size_t
>(in.size());
873 assert(sample_count > 0);
876 assert(in[0].size() == 1);
877 assert(v[0].size() == 1);
880 for (vec_t& dw_sample : dw) {
881 std::fill(dw_sample.begin(), dw_sample.end(), float_t(0));
885 float_t prev_w = w[check_index];
887 float_t f_p = float_t(0);
888 w[check_index] = prev_w + delta;
889 for (serial_size_t i = 0; i < sample_count; i++) {
890 f_p += get_loss<E>(in[i], v[i]);
893 float_t f_m = float_t(0);
894 w[check_index] = prev_w - delta;
895 for (serial_size_t i = 0; i < sample_count; i++) {
896 f_m += get_loss<E>(in[i], v[i]);
899 float_t delta_by_numerical = (f_p - f_m) / (float_t(2) * delta);
900 w[check_index] = prev_w;
903 bprop<E>(fprop(in), v, std::vector<tensor_t>());
905 float_t delta_by_bprop = 0;
906 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
907 delta_by_bprop += dw[sample][check_index];
911 return std::abs(delta_by_bprop - delta_by_numerical) <= eps;
915 template <
typename E>
916 void bprop(
const std::vector<vec_t>& out,
917 const std::vector<vec_t>& t,
const std::vector<vec_t>& t_cost) {
918 bprop<E>(std::vector<tensor_t>{out},
919 std::vector<tensor_t>{t}, std::vector<tensor_t>{t_cost});
922 template <
typename E>
923 void bprop(
const std::vector<tensor_t>& out,
924 const std::vector<tensor_t>& t,
925 const std::vector<tensor_t>& t_cost) {
926 std::vector<tensor_t> delta = gradient<E>(out, t, t_cost);
927 net_.backward(delta);
930 void check_t(
size_t i, label_t t, serial_size_t dim_out) {
932 std::ostringstream os;
933 os << format_str(
"t[%u]=%u, dim(net output)=%u\n", i, t, dim_out);
934 os <<
"in classification task, dim(net output) ";
935 os <<
"must be greater than max class id.\n";
937 os <<
"\n(for regression, use vector<vec_t> ";
938 os <<
"instead of vector<label_t> for training signal)\n";
941 throw nn_error(
"output dimension mismatch!\n " + os.str());
945 void check_t(
size_t i,
const vec_t& t, serial_size_t dim_out) {
946 if (t.size() != dim_out) {
947 throw nn_error(format_str(
948 "output dimension mismatch!\n dim(target[%u])=%u, "
949 "dim(network output size=%u", i, t.size(), dim_out));
953 template <
typename T>
954 void check_training_data(
const std::vector<vec_t>& in,
955 const std::vector<T>& t) {
959 if (in.size() != t.size()) {
960 throw nn_error(
"size of training data must be equal to label data");
963 size_t num = in.size();
965 for (
size_t i = 0; i < num; i++) {
966 if (in[i].size() != dim_in) {
967 throw nn_error(format_str(
968 "input dimension mismatch!\n dim(data[%u])=%d, "
969 "dim(network input)=%u", i, in[i].size(), dim_in));
971 check_t(i, t[i], dim_out);
975 void check_target_cost_matrix(
const std::vector<tensor_t>& t,
976 const std::vector<tensor_t>& t_cost) {
977 if (!t_cost.empty()) {
978 if (t.size() != t_cost.size()) {
979 throw nn_error(
"if target cost is supplied, "
980 "its length must equal that of target data");
983 for (
size_t i = 0, end = t.size(); i < end; i++) {
984 check_target_cost_element(t[i], t_cost[i]);
990 void check_target_cost_element(
const vec_t& t,
const vec_t& t_cost) {
991 if (t.size() != t_cost.size()) {
992 throw nn_error(
"if target cost is supplied for a regression task, "
993 "its shape must be identical to the target data");
996 void check_target_cost_element(
const tensor_t& t,
const tensor_t& t_cost) {
997 if (t.size() != t_cost.size()) {
998 throw nn_error(
"if target cost is supplied for a regression task, "
999 "its shape must be identical to the target data");
1001 for (
size_t i = 0; i < t.size(); i++)
1002 check_target_cost_element(t[i], t_cost[i]);
1005 const tensor_t* get_target_cost_sample_pointer(
1006 const std::vector<tensor_t>& t_cost,
size_t i) {
1007 if (!t_cost.empty()) {
1008 assert(i < t_cost.size());
1009 return &(t_cost[i]);
1015 void normalize_tensor(
const std::vector<tensor_t>& inputs,
1016 std::vector<tensor_t>& normalized) {
1017 normalized = inputs;
1020 void normalize_tensor(
const std::vector<vec_t>& inputs,
1021 std::vector<tensor_t>& normalized) {
1022 normalized.reserve(inputs.size());
1023 for (
size_t i = 0; i < inputs.size(); i++)
1024 normalized.emplace_back(tensor_t{ inputs[i] });
1027 void normalize_tensor(
const std::vector<label_t>& inputs,
1028 std::vector<tensor_t>& normalized) {
1029 std::vector<vec_t> vec;
1030 normalized.reserve(inputs.size());
1031 net_.label2vec(&inputs[0],
static_cast<serial_size_t
>(inputs.size()), &vec);
1032 normalize_tensor(vec, normalized);
1049inline std::vector<vec_t> image2vec(
const float_t* data,
1050 const unsigned int rows,
1051 const unsigned int cols,
1052 const unsigned int sizepatch,
1053 const unsigned int step = 1) {
1055 std::vector<vec_t> res((cols-sizepatch) * (rows-sizepatch) / (step*step),
1056 vec_t(sizepatch*sizepatch));
1057 for_i((cols-sizepatch)*(rows-sizepatch)/(step*step), [&](
int count) {
1058 const int j = step*(count / ((cols-sizepatch)/step));
1059 const int i = step*(count % ((cols-sizepatch)/step));
1063 if (i+sizepatch < cols && j+sizepatch < rows) {
1064 for (
unsigned int k = 0; k < sizepatch*sizepatch; k++) {
1066 unsigned int y = k / sizepatch + j;
1067 unsigned int x = k % sizepatch + i;
1068 res[count][k] = data[x+y*cols];
1077template <
typename Layer>
1078network<sequential>& operator << (network<sequential>& n, Layer&& l) {
1079 n.net_.add(std::forward<Layer>(l));
1083template <
typename NetType,
typename Char,
typename CharTraits>
1084std::basic_ostream<Char, CharTraits>& operator << (std::basic_ostream<Char,
1086 const network<NetType>& n) {
1091template <
typename NetType,
typename Char,
typename CharTraits>
1092std::basic_istream<Char, CharTraits>& operator >> (std::basic_istream<Char,
1094 network<NetType>& n) {
1099inline void construct_graph(network<graph>& graph,
1100 const std::vector<layer*>& inputs,
1101 const std::vector<layer*>& outputs) {
1102 graph.net_.construct(inputs, outputs);
1105inline void construct_graph(network<graph>& graph,
1106 const std::vector<std::shared_ptr<layer>>& inputs,
1107 const std::vector<std::shared_ptr<layer>>& outputs) {
1108 std::vector<layer*> in_ptr, out_ptr;
1109 auto shared2ptr = [](std::shared_ptr<layer> l) {
return l.get(); };
1111 std::transform(inputs.begin(), inputs.end(),
1112 std::back_inserter(in_ptr), shared2ptr);
1113 std::transform(outputs.begin(), outputs.end(),
1114 std::back_inserter(out_ptr), shared2ptr);
1116 graph.net_.construct(in_ptr, out_ptr);