52#include "tiny_dnn/core/framework/device.fwd.h"
54#if defined(USE_OPENCL) || defined(USE_CUDA)
56#include "third_party/CLCudaAPI/clpp11.h"
58#include "third_party/CLCudaAPI/cupp11.h"
64template<
typename U =
float_t>
93 explicit Tensor(
const std::array<size_t, 4>& shape) {
94 reshape(shape[0], shape[1], shape[2], shape[3]);
97 explicit Tensor(
const std::vector<size_t>& shape) {
99 reshape(shape[0], shape[1], shape[2], shape[3]);
106 shape_ = other.shape_;
107 host_data_ = other.host_data_;
108 data_is_on_host_ =
true;
115 shape_ = other.shape_;
116 data_is_on_host_ =
true;
118 host_data_ = other.host_data_;
124#ifdef CNN_USE_DEFAULT_MOVE_CONSTRUCTORS
129 shape_ = std::move(other.shape_);
130 host_data_ = std::move(other.host_data_);
131#if defined(USE_OPENCL) || defined(USE_CUDA)
134 data_is_on_host_ = other.data_is_on_host_;
135 data_dirty_ = other.data_dirty_;
139 shape_ = std::move(other.shape_);
140 host_data_ = std::move(other.host_data_);
141#if defined(USE_OPENCL) || defined(USE_CUDA)
144 data_is_on_host_ = other.data_is_on_host_;
145 data_dirty_ = other.data_dirty_;
151 const std::array<size_t, 4>& shape()
const {
return shape_; }
155 U& host_at(
const size_t d0,
162 U host_at(
const size_t d0,
165 const size_t d3)
const {
171 const U* host_ptr(
const size_t d0,
174 const size_t d3)
const {
175 if (
d0 >= shape_[0] ||
d1 >= shape_[1] ||
176 d2 >= shape_[2] ||
d3 >= shape_[3]) {
177 throw nn_error(
"Access tensor out of range.");
180 return host_data() + (
181 shape_[1] * shape_[2] * shape_[3] *
d0 +
182 shape_[1] * shape_[2] *
d3 +
188 U* host_ptr(
const size_t d0,
192 if (
d0 >= shape_[0] ||
d1 >= shape_[1] ||
193 d2 >= shape_[2] ||
d3 >= shape_[3]) {
194 throw nn_error(
"Access tensor out of range.");
197 return mutable_host_data() + (
198 shape_[1] * shape_[2] * shape_[3] *
d0 +
199 shape_[1] * shape_[2] *
d3 +
205 const U* host_data()
const {
207 return host_data_.data();
210 U* mutable_host_data() {
213 return host_data_.data();
216#if defined(USE_OPENCL) || defined(USE_CUDA)
229 size_t size()
const {
230 return host_data_.size();
234 data_is_on_host_ =
true;
236 std::fill(std::begin(host_data_), std::end(host_data_),
value);
239 void reshape(
const size_t d0,
247 host_data_.resize(calcSize(),
U(0));
250 void reshape(
const std::array<size_t, 4> &
sz) {
252 host_data_.resize(calcSize(),
U(0));
256 size_t calcSize()
const {
257 return std::accumulate(std::begin(shape_), std::end(shape_),
size_t(1), std::multiplies<size_t>());
260 void toDevice()
const {
261 if (data_is_on_host_ && data_dirty_) {
262#if defined(USE_OPENCL) || defined(USE_CUDA)
263 CLCudaAPI::Queue
queue = device->queue();
268 CLCudaAPI::Context
ctx = device->context();
270 ctx,
queue, host_data_.begin(), host_data_.end());
273 data_is_on_host_ =
false;
278 void fromDevice()
const {
279 if (!data_is_on_host_ && data_dirty_) {
280#if defined(USE_OPENCL) || defined(USE_CUDA)
283 device_data_->Read(device_->queue(), host_data_.size(),
const_cast<U*
>(host_data_.data()));
285 data_is_on_host_ =
true;
297 std::array<size_t, 4> shape_;
300 std::vector<U, aligned_allocator<U, 64> > host_data_;
302#if defined(USE_OPENCL) || defined(USE_CUDA)
306 mutable bool data_is_on_host_;
307 mutable bool data_dirty_;
317 const std::vector<serial_size_t>& shape =
tensor.shape();
318 for (serial_size_t
i = 0;
i < shape[0]; ++
i) {
319 os <<
"-- Batch: " <<
i <<
"\n";
320 for (serial_size_t
j = 0;
j < shape[3]; ++
j) {
321 os <<
"-- Channel: " <<
j <<
"\n";
323 for (serial_size_t
k = 0;
k < shape[1]; ++
k) {
324 for (serial_size_t
l = 0;
l < shape[2]; ++
l) {
331 os <<
"----------------\n"
332 <<
"--> Tensor size: [ "
333 << shape[0] <<
" x " << shape[1] <<
" x "
334 << shape[2] <<
" x " << shape[3] <<
" ]\n";
340template<
typename TD,
typename TS1,
typename TS2,
typename F>
void binary_tensor_tensor_elementwise_operation(Tensor<TD> &dst,
const Tensor<TS1> &src1,
const Tensor<TS2> &src2, F f) {
341 if (src1.shape() != src2.shape()) {
342 throw nn_error(
"Tensor must have same shape");
345 dst.reshape(src1.shape());
347 TD* pdst = dst.mutable_host_data();
348 const TS1* psrc1 = src1.host_data();
349 const TS2* psrc2 = src2.host_data();
351 for_i(
true, dst.size(), [pdst, psrc1, psrc2, &f](
size_t i) {
352 pdst[i] = f(psrc1[i], psrc2[i]);
356template<
typename TD,
typename TS,
typename F>
void unary_tensor_elementwise_operation(Tensor<TD> &dst,
const Tensor<TS> &src, F f) {
357 dst.reshape(src.shape());
359 TD* pdst = dst.mutable_host_data();
360 const TS* psrc = src.host_data();
362 for_i(
true, dst.size(), [pdst, psrc, &f](
size_t i) {
363 pdst[i] = f(psrc[i]);
367template<
typename TD,
typename TS1,
typename TS2,
typename F>
void binary_tensor_scalar_operation(Tensor<TD> &dst,
const Tensor<TS1> &src1, TS2 src2, F f) {
368 dst.reshape(src1.shape());
370 TD* pdst = dst.mutable_host_data();
371 const TS1* psrc1 = src1.host_data();
373 for_i(
true, dst.size(), [pdst, psrc1, src2, &f](
size_t i) {
374 pdst[i] = f(psrc1[i], src2);
378template<
typename TD,
typename TS1,
typename TS2,
typename F>
void binary_scalar_tensor_operation(Tensor<TD> &dst, TS1 src1,
const Tensor<TS2> &src2, F f) {
379 dst.reshape(src2.shape());
381 TD* pdst = dst.mutable_host_data();
382 const TS2* psrc2 = src2.host_data();
384 for_i(
true, dst.size(), [pdst, src1, psrc2, &f](
size_t i) {
385 pdst[i] = f(src1, psrc2[i]);
392 template<
typename TS1,
typename TS2>
auto plus(TS1 s1, TS2 s2) ->
decltype(s1 + s2) {
return s1 + s2; }
394 template<
typename TS1,
typename TS2>
auto minus(TS1 s1, TS2 s2) ->
decltype(s1 - s2) {
return s1 - s2; }
396 template<
typename TS1,
typename TS2>
auto multiplies(TS1 s1, TS2 s2) ->
decltype(s1 * s2) {
return s1 * s2; }
398 template<
typename TS1,
typename TS2>
auto divides_checked(TS1 s1, TS2 s2) ->
decltype(s1 / s2) {
399 typedef decltype(s1 / s2) result_type;
400 return (s2 == result_type{}) ? std::numeric_limits<result_type>::quiet_NaN() : s1 / s2;
403 template<
typename TS1,
typename TS2>
auto divides_unchecked(TS1 s1, TS2 s2) ->
decltype(s1 / s2) {
407 template<
typename T> T sqrt_checked(T s1) {
408 return (s1 <= T{}) ? std::numeric_limits<T>::quiet_NaN() : sqrt(s1);
412 template<
typename T> T exp(T s1) {
417template<
typename TD,
typename TS1,
typename TS2>
void layer_add(Tensor<TD> &dst, TS1 src1,
const Tensor<TS2> &src2) {
418 binary_scalar_tensor_operation(dst, src1, src2, details::plus<TS1, TS2>);
421template<
typename TD,
typename TS1,
typename TS2>
void layer_add(Tensor<TD> &dst,
const Tensor<TS1> &src1, TS2 src2) {
422 binary_tensor_scalar_operation(dst, src1, src2, details::plus<TS1, TS2>);
425template<
typename TD,
typename TS1,
typename TS2>
void layer_add(Tensor<TD> &dst,
const Tensor<TS1> &src1,
const Tensor<TS2> &src2) {
426 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::plus<TS1, TS2>);
429template<
typename TD,
typename TS1,
typename TS2>
void layer_sub(Tensor<TD> &dst, TS1 src1,
const Tensor<TS2> &src2) {
430 binary_scalar_tensor_operation(dst, src1, src2, details::minus<TS1, TS2>);
433template<
typename TD,
typename TS1,
typename TS2>
void layer_sub(Tensor<TD> &dst,
const Tensor<TS1> &src1, TS2 src2) {
434 binary_tensor_scalar_operation(dst, src1, src2, details::minus<TS1, TS2>);
437template<
typename TD,
typename TS1,
typename TS2>
void layer_sub(Tensor<TD> &dst,
const Tensor<TS1> &src1,
const Tensor<TS2> &src2) {
438 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::minus<TS1, TS2>);
441template<
typename TD,
typename TS1,
typename TS2>
void layer_mul(Tensor<TD> &dst, TS1 src1,
const Tensor<TS2> &src2) {
442 binary_scalar_tensor_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
445template<
typename TD,
typename TS1,
typename TS2>
void layer_mul(Tensor<TD> &dst,
const Tensor<TS1> &src1, TS2 src2) {
446 binary_tensor_scalar_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
449template<
typename TD,
typename TS1,
typename TS2>
void layer_mul(Tensor<TD> &dst,
const Tensor<TS1> &src1,
const Tensor<TS2> &src2) {
450 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
453template<
typename TD,
typename TS1,
typename TS2>
void layer_div(Tensor<TD> &dst, TS1 src1,
const Tensor<TS2> &src2) {
454 binary_scalar_tensor_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
457template<
typename TD,
typename TS1,
typename TS2>
void layer_div(Tensor<TD> &dst,
const Tensor<TS1> &src1, TS2 src2) {
458 if (src2 == TS2(0.0)) {
459 dst.reshape(src1.shape());
460 dst.fill(std::numeric_limits<TD>::quiet_NaN());
462 binary_tensor_scalar_operation(dst, src1, src2, details::divides_unchecked<TS1, TS2>);
466template<
typename TD,
typename TS1,
typename TS2>
void layer_div(Tensor<TD> &dst,
const Tensor<TS1> &src1,
const Tensor<TS2> &src2) {
467 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
470template<
typename TD,
typename TS>
void layer_sqrt(Tensor<TD> &dst,
const Tensor<TS> &src1) {
471 return unary_tensor_elementwise_operation(dst, src1, details::sqrt_checked<TS>);
474template<
typename TD,
typename TS>
void layer_exp(Tensor<TD> &dst,
const Tensor<TS> &src1) {
475 return unary_tensor_elementwise_operation(dst, src1, details::exp<TS>);
Definition device.fwd.h:73
Simple image utility class.
Definition image.h:94
error exception class for tiny-dnn
Definition nn_error.h:37