50 std::function<
void(
const tensor_t&)>
f1,
51 std::function<
void(
const tensor_t&, tensor_t&)>
f2,
52 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)>
f3,
55 , conv_layer_worker_storage_(
ptr)
56 , copy_and_pad_input(
f1)
57 , copy_and_unpad_delta(
f2)
58 , backward_activation(
f3) {}
62 std::function<
void(
const tensor_t&)>
f1,
63 std::function<
void(
const tensor_t&, tensor_t&)>
f2,
64 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)>
f3,
67 , deconv_layer_worker_storage_(
ptr)
68 , copy_and_unpad_output(
f1)
69 , copy_and_pad_delta(
f2)
70 , backward_activation(
f3) {}
73 tiny_backend(std::vector<std::vector<serial_size_t>>* out2in,
74 std::vector<serial_size_t>* in2out,
75 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f,
77 : max_pooling_layer_worker_storage_(
ptr)
80 , backward_activation(f) {}
84 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f)
86 , backward_activation(f) {}
90 void conv2d(
const std::vector<tensor_t*>&
in_data,
91 std::vector<tensor_t*>&
out_data)
override {
105 void conv2d_q(
const std::vector<tensor_t*>&
in_data,
106 std::vector<tensor_t*>&
out_data)
override {
107 copy_and_pad_input(*
in_data[0]);
109 const vec_t& bias = (*
in_data[2])[0];
111 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_;
115 for (serial_size_t
i = 0;
i < in.size();
i++) {
116 kernels::tiny_quantized_conv2d_kernel(*params_c_,
117 *in[
i],
W, bias,
a[
i], layer_->parallelize());
122 void conv2d_eq(
const std::vector<tensor_t*>&
in_data,
123 std::vector<tensor_t*>&
out_data)
override {
124 copy_and_pad_input(*
in_data[0]);
126 const vec_t& bias = (*
in_data[2])[0];
133 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_;
136 for (serial_size_t
i = 0;
i < in.size();
i++) {
137 kernels::tiny_quantized_conv2d_kernel(*params_c_,
142 void conv2d(
const std::vector<tensor_t*>&
in_data,
143 const std::vector<tensor_t*>&
out_data,
145 std::vector<tensor_t*>&
in_grad)
override {
172 void conv2d_q(
const std::vector<tensor_t*>&
in_data,
173 const std::vector<tensor_t*>&
out_data,
175 std::vector<tensor_t*>&
in_grad)
override {
178 std::vector<const vec_t*>&
prev_out =
cws.prev_out_padded_;
183 tensor_t*
prev_delta = (params_c_->pad_type == padding::same) ?
186 assert(
W.size() == params_c_->weight.size());
187 assert(
dW[0].size() == params_c_->weight.size());
194 for (serial_size_t
i = 0;
i <
prev_out.size();
i++) {
195 kernels::tiny_quantized_conv2d_back_kernel(*params_c_,
199 if (params_c_->pad_type == padding::same) {
200 copy_and_unpad_delta(
cws.prev_delta_padded_, *
in_grad[0]);
204 void deconv2d(
const std::vector<tensor_t*>&
in_data,
205 std::vector<tensor_t*>&
out_data)
override {
206 (*deconv_layer_worker_storage_).prev_out_ =
in_data[0];
208 const vec_t& bias = (*
in_data[2])[0];
210 const tensor_t& in = *
in_data[0];
212 fill_tensor(
a,
float_t(0), params_d_->out.size());
214 kernels::tiny_deconv2d_kernel(*params_d_,
215 in,
W, bias,
a, layer_->parallelize());
217 copy_and_unpad_output(
a);
218 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
222 void deconv2d_q(
const std::vector<tensor_t*>&
in_data,
223 std::vector<tensor_t*>&
out_data)
override {
224 (*deconv_layer_worker_storage_).prev_out_ =
in_data[0];
225 const tensor_t& in = *
in_data[0];
227 const vec_t& bias = (*
in_data[2])[0];
230 fill_tensor(
a,
float_t(0), params_d_->out.size());
232 for (serial_size_t
i = 0;
i < in.size();
i++) {
233 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
234 in[
i],
W, bias,
a[
i], layer_->parallelize());
237 copy_and_unpad_output(
a);
238 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
242 void deconv2d_eq(
const std::vector<tensor_t*>&
in_data,
243 std::vector<tensor_t*>&
out_data)
override {
244 (*deconv_layer_worker_storage_).prev_out_ =
in_data[0];
245 const tensor_t& in = *
in_data[0];
247 const vec_t& bias = (*
in_data[2])[0];
254 fill_tensor(
a,
float_t(0), params_d_->out.size());
256 for (serial_size_t
i = 0;
i < in.size();
i++) {
257 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
261 copy_and_unpad_output(
a);
262 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
265 void deconv2d(
const std::vector<tensor_t*>&
in_data,
266 const std::vector<tensor_t*>&
out_data,
268 std::vector<tensor_t*>&
in_grad)
override {
271 if (params_d_->pad_type == padding::same)
272 copy_and_pad_delta(
cws.curr_delta_padded, *
in_grad[0]);
278 tensor_t&
curr_delta = (params_d_->pad_type == padding::same) ?
cws.curr_delta_padded : *
out_grad[1];
281 assert(
W.size() == params_d_->weight.size());
282 assert(
dW[0].size() == params_d_->weight.size());
289 kernels::tiny_deconv2d_back_kernel(*params_d_,
293 void deconv2d_q(
const std::vector<tensor_t*>&
in_data,
294 const std::vector<tensor_t*>&
out_data,
296 std::vector<tensor_t*>&
in_grad)
override {
299 if (params_d_->pad_type == padding::same)
300 copy_and_pad_delta(
cws.curr_delta_padded, *
in_grad[0]);
306 tensor_t&
curr_delta = (params_d_->pad_type == padding::same) ?
cws.curr_delta_padded : *
out_grad[1];
309 assert(
W.size() == params_d_->weight.size());
310 assert(
dW[0].size() == params_d_->weight.size());
317 for (serial_size_t
i = 0;
i <
prev_out.size();
i++) {
318 kernels::tiny_quantized_deconv2d_back_kernel(*params_d_,
323 void maxpool(
const std::vector<tensor_t*>&
in_data,
324 std::vector<tensor_t*>&
out_data)
override {
326 if (max_pooling_layer_worker_storage_) {}
339 void maxpool(
const std::vector<tensor_t*>&
in_data,
340 const std::vector<tensor_t*>&
out_data,
342 std::vector<tensor_t*>&
in_grad)
override {
354 void fully(
const std::vector<tensor_t*>&
in_data,
355 std::vector<tensor_t*>&
out_data)
override {
365 void fully_q(
const std::vector<tensor_t*>&
in_data,
366 std::vector<tensor_t*>&
out_data)
override {
367#ifdef CNN_USE_GEMMLOWP
368 const tensor_t& in = *
in_data[0];
372 for (serial_size_t
i = 0;
i < in.size();
i++) {
373 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
374 in[
i],
W, params_f_->has_bias_ ? (*
in_data[2])[0] : vec_t(),
375 a[
i], layer_->parallelize());
382 void fully_eq(
const std::vector<tensor_t*>&
in_data,
383 std::vector<tensor_t*>&
out_data)
override {
384#ifdef CNN_USE_GEMMLOWP
385 const tensor_t& in = *
in_data[0];
394 for (serial_size_t
i = 0;
i < in.size();
i++) {
395 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
403 void fully(
const std::vector<tensor_t*>&
in_data,
404 const std::vector<tensor_t*>&
out_data,
406 std::vector<tensor_t*>&
in_grad)
override {
420 void fully_q(
const std::vector<tensor_t*>&
in_data,
421 const std::vector<tensor_t*>&
out_data,
423 std::vector<tensor_t*>&
in_grad)
override {
424#ifdef CNN_USE_GEMMLOWP
434 for (serial_size_t
i = 0;
i <
prev_out.size();
i++) {
435 kernels::tiny_quantized_fully_connected_back_kernel(*params_f_,
prev_out[
i],
443 backend_t type()
const override {
return default_engine(); }
455 std::vector<std::vector<serial_size_t>>* out2in_;
456 std::vector<serial_size_t>* in2out_;
459 std::function<
void(
const tensor_t&)> copy_and_pad_input;
460 std::function<
void(
const tensor_t&)> copy_and_unpad_output;
461 std::function<
void(
const tensor_t&, tensor_t&)> copy_and_unpad_delta;
462 std::function<
void(
const tensor_t&, tensor_t&)> copy_and_pad_delta;
463 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> backward_activation;