tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
backend_tiny.h
1/*
2 Copyright (c) 2016, Taiga Nomi, Edgar Riba
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28
29#include "tiny_dnn/config.h"
30#include "tiny_dnn/core/backend.h"
31
32#include "tiny_dnn/core/kernels/tiny_quantized_conv2d_kernel.h"
33#include "tiny_dnn/core/kernels/tiny_deconv2d_kernel.h"
34#include "tiny_dnn/core/kernels/tiny_quantized_deconv2d_kernel.h"
35#include "tiny_dnn/core/kernels/tiny_deconv2d_back_kernel.h"
36#ifdef CNN_USE_GEMMLOWP
37#include "tiny_dnn/core/kernels/tiny_quantized_fully_connected_kernel.h"
38#endif
39
40namespace tiny_dnn {
41namespace core {
42
43class tiny_backend : public backend {
44 public:
45 // context holds solution-dependent parameters
46 // context should be able to hold any types of structures (like boost::any)
47
48 // convolution
50 std::function<void(const tensor_t&)> f1,
51 std::function<void(const tensor_t&, tensor_t&)> f2,
52 std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f3,
54 : params_c_(params)
55 , conv_layer_worker_storage_(ptr)
56 , copy_and_pad_input(f1)
57 , copy_and_unpad_delta(f2)
58 , backward_activation(f3) {}
59
60 // deconvolution
62 std::function<void(const tensor_t&)> f1,
63 std::function<void(const tensor_t&, tensor_t&)> f2,
64 std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f3,
66 : params_d_(params)
67 , deconv_layer_worker_storage_(ptr)
68 , copy_and_unpad_output(f1)
69 , copy_and_pad_delta(f2)
70 , backward_activation(f3) {}
71
72 // maxpooling
73 tiny_backend(std::vector<std::vector<serial_size_t>>* out2in,
74 std::vector<serial_size_t>* in2out,
75 std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f,
77 : max_pooling_layer_worker_storage_(ptr)
78 , out2in_(out2in)
79 , in2out_(in2out)
80 , backward_activation(f) {}
81
82 // fully_connected
84 std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f)
85 : params_f_(params)
86 , backward_activation(f) {}
87
88 // core math functions
89
90 void conv2d(const std::vector<tensor_t*>& in_data,
91 std::vector<tensor_t*>& out_data) override {
92 /*copy_and_pad_input(*in_data[0]);
93 const vec_t& W = (*in_data[1])[0];
94 const vec_t& bias = (*in_data[2])[0];
95 tensor_t& a = *out_data[1];
96 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
97
98 fill_tensor(a, float_t(0));
99
100 kernels::tiny_conv2d_kernel(*params_c_,
101 in, W, bias, a, layer_->parallelize());*/
102 }
103
104 // quantized convolution
105 void conv2d_q(const std::vector<tensor_t*>& in_data,
106 std::vector<tensor_t*>& out_data) override {
107 copy_and_pad_input(*in_data[0]);
108 const vec_t& W = (*in_data[1])[0];
109 const vec_t& bias = (*in_data[2])[0];
110 tensor_t& a = *out_data[1];
111 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
112
113 fill_tensor(a, float_t(0));
114
115 for (serial_size_t i = 0; i < in.size(); i++) {
116 kernels::tiny_quantized_conv2d_kernel(*params_c_,
117 *in[i], W, bias, a[i], layer_->parallelize());
118 }
119 }
120
121 // efficient quantization without abundant quantization/dequantization
122 void conv2d_eq(const std::vector<tensor_t*>& in_data,
123 std::vector<tensor_t*>& out_data) override {
124 copy_and_pad_input(*in_data[0]);
125 const vec_t& W = (*in_data[1])[0];
126 const vec_t& bias = (*in_data[2])[0];
127 const tensor_t& in_r = *in_data[3];
128 const vec_t& W_r = (*in_data[4])[0];
129 const vec_t& b_r = (*in_data[5])[0];
130 tensor_t& a = *out_data[1];
131 tensor_t& a_r = *out_data[2];
132
133 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
134
135 fill_tensor(a, float_t(0));
136 for (serial_size_t i = 0; i < in.size(); i++) {
137 kernels::tiny_quantized_conv2d_kernel(*params_c_,
138 *in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
139 }
140 }
141
142 void conv2d(const std::vector<tensor_t*>& in_data,
143 const std::vector<tensor_t*>& out_data,
144 std::vector<tensor_t*>& out_grad,
145 std::vector<tensor_t*>& in_grad) override {
146 /*conv_layer_worker_specific_storage& cws = (*conv_layer_worker_storage_);
147
148 std::vector<const vec_t*>& prev_out = cws.prev_out_padded_;
149 const vec_t& W = (*in_data[1])[0];
150 tensor_t& dW = *in_grad[1];
151 tensor_t& db = *in_grad[2];
152 tensor_t& curr_delta = *out_grad[1];
153 tensor_t* prev_delta = (params_c_->pad_type == padding::same) ?
154 &cws.prev_delta_padded_ : in_grad[0];
155
156 assert(W.size() == params_c_->weight.size());
157 assert(dW[0].size() == params_c_->weight.size());
158 assert(curr_delta[0].size() == layer_->out_shape()[0].size());
159
160 backward_activation(*out_grad[0], *out_data[0], curr_delta);
161
162 fill_tensor(*prev_delta, float_t(0));
163
164 kernels::tiny_conv2d_back_kernel(*params_c_,
165 prev_out, W, dW, db, curr_delta, prev_delta);
166
167 if (params_c_->pad_type == padding::same) {
168 copy_and_unpad_delta(cws.prev_delta_padded_, *in_grad[0]);
169 }*/
170 }
171
172 void conv2d_q(const std::vector<tensor_t*>& in_data,
173 const std::vector<tensor_t*>& out_data,
174 std::vector<tensor_t*>& out_grad,
175 std::vector<tensor_t*>& in_grad) override {
176 conv_layer_worker_specific_storage& cws = (*conv_layer_worker_storage_);
177
178 std::vector<const vec_t*>& prev_out = cws.prev_out_padded_;
179 const vec_t& W = (*in_data[1])[0];
180 tensor_t& dW = *in_grad[1];
181 tensor_t& db = *in_grad[2];
182 tensor_t& curr_delta = *out_grad[1];
183 tensor_t* prev_delta = (params_c_->pad_type == padding::same) ?
184 &cws.prev_delta_padded_ : in_grad[0];
185
186 assert(W.size() == params_c_->weight.size());
187 assert(dW[0].size() == params_c_->weight.size());
188 assert(curr_delta[0].size() == layer_->out_shape()[0].size());
189
190 backward_activation(*out_grad[0], *out_data[0], curr_delta);
191
192 fill_tensor(*prev_delta, float_t(0));
193
194 for (serial_size_t i = 0; i < prev_out.size(); i++) {
195 kernels::tiny_quantized_conv2d_back_kernel(*params_c_,
196 *prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
197 }
198
199 if (params_c_->pad_type == padding::same) {
200 copy_and_unpad_delta(cws.prev_delta_padded_, *in_grad[0]);
201 }
202 }
203
204 void deconv2d(const std::vector<tensor_t*>& in_data,
205 std::vector<tensor_t*>& out_data) override {
206 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
207 const vec_t& W = (*in_data[1])[0];
208 const vec_t& bias = (*in_data[2])[0];
209 tensor_t& a = *out_data[1];
210 const tensor_t& in = *in_data[0]; // input
211
212 fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
213
214 kernels::tiny_deconv2d_kernel(*params_d_,
215 in, W, bias, a, layer_->parallelize());
216
217 copy_and_unpad_output(a);
218 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
219 }
220
221 // quantized deconvolution
222 void deconv2d_q(const std::vector<tensor_t*>& in_data,
223 std::vector<tensor_t*>& out_data) override {
224 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
225 const tensor_t& in = *in_data[0]; // input
226 const vec_t& W = (*in_data[1])[0];
227 const vec_t& bias = (*in_data[2])[0];
228 tensor_t& a = *out_data[1];
229
230 fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
231
232 for (serial_size_t i = 0; i < in.size(); i++) {
233 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
234 in[i], W, bias, a[i], layer_->parallelize());
235 }
236
237 copy_and_unpad_output(a);
238 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
239 }
240
241 // efficient quantization without abundant quantization/dequantization
242 void deconv2d_eq(const std::vector<tensor_t*>& in_data,
243 std::vector<tensor_t*>& out_data) override {
244 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
245 const tensor_t& in = *in_data[0]; // input
246 const vec_t& W = (*in_data[1])[0];
247 const vec_t& bias = (*in_data[2])[0];
248 const tensor_t& in_r = *in_data[3];
249 const vec_t& W_r = (*in_data[4])[0];
250 const vec_t& b_r = (*in_data[5])[0];
251 tensor_t& a = *out_data[1];
252 tensor_t& a_r = *out_data[2];
253
254 fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
255
256 for (serial_size_t i = 0; i < in.size(); i++) {
257 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
258 in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
259 }
260
261 copy_and_unpad_output(a);
262 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
263 }
264
265 void deconv2d(const std::vector<tensor_t*>& in_data,
266 const std::vector<tensor_t*>& out_data,
267 std::vector<tensor_t*>& out_grad,
268 std::vector<tensor_t*>& in_grad) override {
269
270 deconv_layer_worker_specific_storage& cws = (*deconv_layer_worker_storage_);
271 if (params_d_->pad_type == padding::same)
272 copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
273
274 const tensor_t& prev_out = *(cws.prev_out_);
275 const vec_t& W = (*in_data[1])[0];
276 tensor_t& dW = *in_grad[1];
277 tensor_t& db = *in_grad[2];
278 tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
279 tensor_t* prev_delta = in_grad[0];
280
281 assert(W.size() == params_d_->weight.size());
282 assert(dW[0].size() == params_d_->weight.size());
283 assert(curr_delta[0].size() == layer_->out_shape()[0].size());
284
285 backward_activation(*out_grad[0], *out_data[0], curr_delta);
286
287 fill_tensor(*prev_delta, float_t(0));
288
289 kernels::tiny_deconv2d_back_kernel(*params_d_,
291 }
292
293 void deconv2d_q(const std::vector<tensor_t*>& in_data,
294 const std::vector<tensor_t*>& out_data,
295 std::vector<tensor_t*>& out_grad,
296 std::vector<tensor_t*>& in_grad) override {
297
298 deconv_layer_worker_specific_storage& cws = (*deconv_layer_worker_storage_);
299 if (params_d_->pad_type == padding::same)
300 copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
301
302 const tensor_t& prev_out = *(cws.prev_out_);
303 const vec_t& W = (*in_data[1])[0];
304 tensor_t& dW = *in_grad[1];
305 tensor_t& db = *in_grad[2];
306 tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
307 tensor_t* prev_delta = in_grad[0];
308
309 assert(W.size() == params_d_->weight.size());
310 assert(dW[0].size() == params_d_->weight.size());
311 assert(curr_delta[0].size() == layer_->out_shape()[0].size());
312
313 backward_activation(*out_grad[0], *out_data[0], curr_delta);
314
315 fill_tensor(*prev_delta, float_t(0));
316
317 for (serial_size_t i = 0; i < prev_out.size(); i++) {
318 kernels::tiny_quantized_deconv2d_back_kernel(*params_d_,
319 prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
320 }
321 }
322
323 void maxpool(const std::vector<tensor_t*>& in_data,
324 std::vector<tensor_t*>& out_data) override {
325 // just to fix warning. Remove in a future
326 if (max_pooling_layer_worker_storage_) {}
327 if (out2in_) {}
328 if (in2out_) {}
329
330 /*const tensor_t& in = *in_data[0];
331 tensor_t& a = *out_data[1];
332 std::vector<std::vector<serial_size_t>>& max_idx =
333 (*max_pooling_layer_worker_storage_).out2inmax_;
334
335 kernels::tiny_maxpool_kernel(in, a,
336 max_idx, *out2in_, layer_->parallelize());*/
337 }
338
339 void maxpool(const std::vector<tensor_t*>& in_data,
340 const std::vector<tensor_t*>& out_data,
341 std::vector<tensor_t*>& out_grad,
342 std::vector<tensor_t*>& in_grad) override {
343 /*tensor_t& prev_delta = *in_grad[0];
344 tensor_t& curr_delta = *out_grad[1];
345 std::vector<std::vector<serial_size_t>>& max_idx =
346 (*max_pooling_layer_worker_storage_).out2inmax_;
347
348 backward_activation(*out_grad[0], *out_data[0], curr_delta);
349
350 kernels::tiny_maxpool_back_kernel(prev_delta, curr_delta,
351 max_idx, *in2out_, layer_->parallelize());*/
352 }
353
354 void fully(const std::vector<tensor_t*>& in_data,
355 std::vector<tensor_t*>& out_data) override {
356 /*const tensor_t& in = *in_data[0];
357 const vec_t& W = (*in_data[1])[0];
358 tensor_t& a = *out_data[1];
359
360 kernels::tiny_fully_connected_kernel(*params_f_,
361 in, W, params_f_->has_bias_ ? (*in_data[2])[0] : vec_t(),
362 a, layer_->parallelize());*/
363 }
364
365 void fully_q(const std::vector<tensor_t*>& in_data,
366 std::vector<tensor_t*>& out_data) override {
367#ifdef CNN_USE_GEMMLOWP
368 const tensor_t& in = *in_data[0];
369 const vec_t& W = (*in_data[1])[0];
370 tensor_t& a = *out_data[1];
371
372 for (serial_size_t i = 0; i < in.size(); i++) {
373 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
374 in[i], W, params_f_->has_bias_ ? (*in_data[2])[0] : vec_t(),
375 a[i], layer_->parallelize());
376 }
377#else
378 throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
379#endif
380 }
381
382 void fully_eq(const std::vector<tensor_t*>& in_data,
383 std::vector<tensor_t*>& out_data) override {
384#ifdef CNN_USE_GEMMLOWP
385 const tensor_t& in = *in_data[0];
386 const vec_t& W = (*in_data[1])[0];
387 vec_t& b = (*in_data[2])[0];
388 const tensor_t& in_r = *in_data[3];
389 const vec_t& W_r = (*in_data[4])[0];
390 const vec_t& b_r = (*in_data[5])[0];
391 tensor_t& a = *out_data[1];
392 tensor_t& a_r = *out_data[2];
393
394 for (serial_size_t i = 0; i < in.size(); i++) {
395 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
396 in[i], W, b, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
397 }
398#else
399 throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
400#endif
401 }
402
403 void fully(const std::vector<tensor_t*>& in_data,
404 const std::vector<tensor_t*>& out_data,
405 std::vector<tensor_t*>& out_grad,
406 std::vector<tensor_t*>& in_grad) override {
407 /*const tensor_t& prev_out = *in_data[0];
408 const vec_t& W = (*in_data[1])[0];
409 tensor_t& dW = *in_grad[1];
410 tensor_t& db = *in_grad[2];
411 tensor_t& prev_delta = *in_grad[0];
412 tensor_t& curr_delta = *out_grad[1];
413
414 backward_activation(*out_grad[0], *out_data[0], curr_delta);
415
416 kernels::tiny_fully_connected_back_kernel(*params_f_, prev_out,
417 W, dW, prev_delta, curr_delta, db, layer_->parallelize());*/
418 }
419
420 void fully_q(const std::vector<tensor_t*>& in_data,
421 const std::vector<tensor_t*>& out_data,
422 std::vector<tensor_t*>& out_grad,
423 std::vector<tensor_t*>& in_grad) override {
424#ifdef CNN_USE_GEMMLOWP
425 const tensor_t& prev_out = *in_data[0];
426 const vec_t& W = (*in_data[1])[0];
427 tensor_t& dW = *in_grad[1];
428 tensor_t& db = *in_grad[2];
429 tensor_t& prev_delta = *in_grad[0];
430 tensor_t& curr_delta = *out_grad[1];
431
432 backward_activation(*out_grad[0], *out_data[0], curr_delta);
433
434 for (serial_size_t i = 0; i < prev_out.size(); i++) {
435 kernels::tiny_quantized_fully_connected_back_kernel(*params_f_, prev_out[i],
436 W, dW[i], prev_delta[i], curr_delta[i], db[i], layer_->parallelize());
437 }
438#else
439 throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
440#endif
441 }
442
443 backend_t type() const override { return default_engine(); }
444
445 private:
446 /* Pointer to the convolution parameters */
447 conv_params* params_c_;
448 deconv_params* params_d_;
449 fully_params* params_f_;
450
451 /* Pointer to the workers */
452 conv_layer_worker_specific_storage* conv_layer_worker_storage_;
453 deconv_layer_worker_specific_storage* deconv_layer_worker_storage_;
454 max_pooling_layer_worker_specific_storage* max_pooling_layer_worker_storage_;
455 std::vector<std::vector<serial_size_t>>* out2in_;
456 std::vector<serial_size_t>* in2out_;
457
458 /* Pointers to parent class functions */
459 std::function<void(const tensor_t&)> copy_and_pad_input;
460 std::function<void(const tensor_t&)> copy_and_unpad_output;
461 std::function<void(const tensor_t&, tensor_t&)> copy_and_unpad_delta;
462 std::function<void(const tensor_t&, tensor_t&)> copy_and_pad_delta;
463 std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> backward_activation;
464};
465
466} // namespace core
467} // namespace tiny_dnn
Definition backend.h:68
Definition conv_params.h:92
Definition fully_params.h:34
Definition backend_tiny.h:43
Simple image utility class.
Definition image.h:94
virtual std::vector< shape3d > out_shape() const =0
array of output shapes (width x height x depth)
Definition nn_error.h:83
Definition deconv_params.h:39