28#include "tiny_dnn/util/util.h"
29#include "tiny_dnn/util/math_functions.h"
30#include "tiny_dnn/layers/layer.h"
53 float_t epsilon = 1
e-5,
55 net_phase
phase = net_phase::train)
56 :
Base({ vector_type::data }, { vector_type::data }),
57 in_channels_(prev_layer.out_shape()[0].depth_),
58 in_spatial_size_(prev_layer.out_shape()[0].area()),
62 update_immidiately_(
false)
76 float_t epsilon = 1
e-5,
78 net_phase
phase = net_phase::train)
79 :
Base({ vector_type::data }, { vector_type::data }),
81 in_spatial_size_(in_spatial_size),
85 update_immidiately_(
false)
102 std::vector<index3d<serial_size_t>>
in_shape()
const override {
106 std::vector<index3d<serial_size_t>>
out_shape()
const override {
111 const std::vector<tensor_t*>&
out_data,
113 std::vector<tensor_t*>&
in_grad)
override {
119 CNN_UNREFERENCED_PARAMETER(
in_data);
125 for (serial_size_t
j = 0;
j <
curr_out[0].size();
j++) {
139 for (serial_size_t
j = 0;
j < in_channels_;
j++) {
140 for (serial_size_t
k = 0;
k < in_spatial_size_;
k++) {
141 serial_size_t index =
j*in_spatial_size_ +
k;
154 std::vector<tensor_t*>&
out_data)
override {
155 vec_t*
mean =
nullptr;
160 if (phase_ == net_phase::train) {
162 mean = &mean_current_;
176 const float_t*
inptr = &in[
i][0];
179 for (serial_size_t
j = 0;
j < in_channels_;
j++) {
180 float_t
m = (*mean)[
j];
182 for (serial_size_t
k = 0;
k < in_spatial_size_;
k++) {
188 if (phase_ == net_phase::train && update_immidiately_) {
189 mean_ = mean_current_;
190 variance_ = variance_current_;
199 std::string
layer_type()
const override {
return "batch-norm"; }
202 for (serial_size_t
i = 0;
i < mean_.size();
i++) {
203 mean_[
i] = momentum_ * mean_[
i] + (1 - momentum_) * mean_current_[
i];
204 variance_[
i] = momentum_ * variance_[
i] + (1 - momentum_) * variance_current_[
i];
208 virtual void save(std::ostream&
os)
const override {
210 for (
auto m : mean_)
os <<
m <<
" ";
211 for (
auto v : variance_)
os <<
v <<
" ";
214 virtual void load(std::istream& is)
override {
216 for (
auto& m : mean_) is >> m;
217 for (
auto& v : variance_) is >> v;
220 virtual void load(
const std::vector<float_t>& src,
int& idx)
override {
221 Base::load(src, idx);
222 for (
auto& m : mean_) m = src[idx++];
223 for (
auto& v : variance_) v = src[idx++];
226 void update_immidiately(
bool update) {
227 update_immidiately_ = update;
230 void set_stddev(
const vec_t& stddev) {
234 void set_mean(
const vec_t& mean) {
238 void set_variance(
const vec_t& variance) {
239 variance_ = variance;
240 calc_stddev(variance);
243 template <
class Archive>
244 static void load_and_construct(Archive & ar, cereal::construct<batch_normalization_layer> & construct) {
247 float_t eps, momentum;
249 vec_t mean, variance;
251 ar(cereal::make_nvp(
"in_spatial_size", in_spatial_size),
253 cereal::make_nvp(
"epsilon", eps),
254 cereal::make_nvp(
"momentum", momentum),
255 cereal::make_nvp(
"phase", phase),
256 cereal::make_nvp(
"mean", mean),
257 cereal::make_nvp(
"variance", variance));
258 construct(in_spatial_size,
in_channels, eps, momentum, phase);
259 construct->set_mean(mean);
260 construct->set_variance(variance);
263 template <
class Archive>
264 void serialize(Archive & ar) {
265 layer::serialize_prolog(ar);
266 ar(cereal::make_nvp(
"in_spatial_size", in_spatial_size_),
267 cereal::make_nvp(
"in_channels", in_channels_),
268 cereal::make_nvp(
"epsilon", eps_),
269 cereal::make_nvp(
"momentum", momentum_),
270 cereal::make_nvp(
"phase", phase_),
271 cereal::make_nvp(
"mean", mean_),
272 cereal::make_nvp(
"variance", variance_));
275 float_t epsilon()
const {
279 float_t momentum()
const {
284 void calc_stddev(
const vec_t& variance) {
285 for (
size_t i = 0; i < in_channels_; i++) {
286 stddev_[i] = sqrt(variance[i] + eps_);
291 mean_current_.resize(in_channels_);
292 mean_.resize(in_channels_);
293 variance_current_.resize(in_channels_);
294 variance_.resize(in_channels_);
295 tmp_mean_.resize(in_channels_);
296 stddev_.resize(in_channels_);
299 serial_size_t in_channels_;
300 serial_size_t in_spatial_size_;
308 vec_t variance_current_;
318 bool update_immidiately_;
Batch Normalization.
Definition batch_normalization_layer.h:42
std::vector< index3d< serial_size_t > > out_shape() const override
array of output shapes (width x height x depth)
Definition batch_normalization_layer.h:106
void set_context(net_phase ctx) override
notify changing context (train <=> test)
Definition batch_normalization_layer.h:194
void back_propagation(const std::vector< tensor_t * > &in_data, const std::vector< tensor_t * > &out_data, std::vector< tensor_t * > &out_grad, std::vector< tensor_t * > &in_grad) override
return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer)
Definition batch_normalization_layer.h:110
virtual void post_update() override
return delta2 of previous layer (delta2=\frac{d^2E}{da^2}, diagonal of hessian matrix) it is never ca...
Definition batch_normalization_layer.h:201
void forward_propagation(const std::vector< tensor_t * > &in_data, std::vector< tensor_t * > &out_data) override
Definition batch_normalization_layer.h:153
std::string layer_type() const override
name of layer, should be unique for each concrete class
Definition batch_normalization_layer.h:199
serial_size_t fan_in_size() const override
number of outgoing connections for each input unit
Definition batch_normalization_layer.h:93
virtual ~batch_normalization_layer()
number of incoming connections for each output unit
Definition batch_normalization_layer.h:90
std::vector< index3d< serial_size_t > > in_shape() const override
array of input shapes (width x height x depth)
Definition batch_normalization_layer.h:102
batch_normalization_layer(const layer &prev_layer, float_t epsilon=1e-5, float_t momentum=0.999, net_phase phase=net_phase::train)
Definition batch_normalization_layer.h:52
serial_size_t fan_out_size() const override
number of outgoing connections for each input unit used only for weight/bias initialization methods w...
Definition batch_normalization_layer.h:98
batch_normalization_layer(serial_size_t in_spatial_size, serial_size_t in_channels, float_t epsilon=1e-5, float_t momentum=0.999, net_phase phase=net_phase::train)
Definition batch_normalization_layer.h:74
Simple image utility class.
Definition image.h:94
base class of all kind of NN layers
Definition layer.h:62
bool parallelize_
Flag indicating whether the layer/node operations ara paralellized.
Definition layer.h:696
serial_size_t in_channels() const
number of outgoing edges in this layer
Definition layer.h:146
SGD with momentum.
Definition optimizer.h:178