tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
conv2d_op_opencl.h
1/*
2 COPYRIGHT
3
4 All contributions by Taiga Nomi
5 Copyright (c) 2013, Taiga Nomi
6 All rights reserved.
7
8 All other contributions:
9 Copyright (c) 2013-2016, the respective contributors.
10 All rights reserved.
11
12 Each contributor holds copyright over their respective contributions.
13 The project versioning (Git) records all such contribution source information.
14
15 LICENSE
16
17 The BSD 3-Clause License
18
19
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22
23 * Redistributions of source code must retain the above copyright notice, this
24 list of conditions and the following disclaimer.
25
26 * Redistributions in binary form must reproduce the above copyright notice,
27 this list of conditions and the following disclaimer in the documentation
28 and/or other materials provided with the distribution.
29
30 * Neither the name of tiny-cnn nor the names of its
31 contributors may be used to endorse or promote products derived from
32 this software without specific prior written permission.
33
34 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44*/
45#pragma once
46
47#include "tiny_dnn/core/framework/op_kernel.h"
48
49namespace tiny_dnn {
50
52 public:
55
56 void compute(const core::OpKernelContext& context) override {
57#if defined(USE_OPENCL) || defined(USE_CUDA)
58 auto params = OpKernel::params_->conv();
59
60 // incoming/outcoming data
61 const tensor_t& in_data = context.input(0);
62 const tensor_t& W = context.input(1);
63 const tensor_t& bias = context.input(2);
64 tensor_t& out_data = context.output(1);
65
66 // initialize outputs
67 fill_tensor(out_data, float_t(0));
68
69 // retrieve program from register
70 CLCudaAPI::Program program = ProgramManager::getInstance()
71 .program(Program(context.device(), context.Layer()));
72 nn_warn("Got Program");
73
74 // Creates the kernel from the compiled program and sets the three arguments.
75 // Note that the indices of the arguments have to be set according to their
76 // order in the kernel.
77 auto kernel = CLCudaAPI::Kernel(program, "CFMulti");
78 nn_warn("Got Kernel");
79
80 tiny_dnn::Device* device = context.device();
81 CLCudaAPI::Context ctx = context.device()->context();
82 CLCudaAPI::Queue queue = context.device()->queue();
83
84 // TODO(edgar): check if we really need that
85 for (serial_size_t i = 0; i < in_data.size(); ++i) {
86
87 // Creates device buffers and copies the host data to these
88 // device buffers.
89
90 auto dev_in = CLCudaAPI::Buffer<float_t>(ctx, queue,
91 in_data[i].begin(), in_data[i].end());
92
93 auto dev_W = CLCudaAPI::Buffer<float_t>(ctx, queue,
94 W[0].begin(), W[0].end());
95
96 auto dev_bias = CLCudaAPI::Buffer<float_t>(ctx, queue,
97 bias[0].begin(), bias[0].end());
98
99 auto dev_out = CLCudaAPI::Buffer<float_t>(ctx, queue,
100 out_data[i].begin(), out_data[i].end());
101
102 kernel.SetArgument(0, dev_in); // image_data
103 kernel.SetArgument(1, 0); // image_offset
104 kernel.SetArgument(2, dev_W); // kernel_data
105 kernel.SetArgument(3, 0); // kernel_offset
106 kernel.SetArgument(4, dev_bias); // bias
107 kernel.SetArgument(5, 0); // bias_offset
108 kernel.SetArgument(6, dev_out); // convolved_image
109 kernel.SetArgument(7, 0); // convolved_image_offset
110
111 kernel.SetArgument(8, static_cast<cl_ushort>(params.in.width_)); // WIDTH
112 kernel.SetArgument(9, static_cast<cl_ushort>(params.in.height_)); // HEIGHT
113 kernel.SetArgument(10, static_cast<cl_ushort>(params.out.width_)); // OUTPUT_W
114 kernel.SetArgument(11, static_cast<cl_ushort>(params.out.height_)); // OUTPUT_H
115
116 // We make sure that work group size is multiple of 16
117 serial_size_t res = device->device().MaxWorkGroupSize() % 16;
118 serial_size_t size = device->device().MaxWorkGroupSize() - res;
119
120 auto global = std::vector<size_t>{size};
121 auto local = std::vector<size_t>{16};
122
123 // Creates a new CLCudaAPI event to be able to time kernels
124 auto event = CLCudaAPI::Event();
125
126 // Enqueues the kernel and waits for the result.
127 // Note that launching the kernel is always a-synchronous and thus
128 // requires finishing the queue in order to complete the operation.
129 nn_info("## Running the kernel ...");
130
131 kernel.Launch(queue, global, local, event.pointer());
132 queue.Finish(event);
133
134 nn_info(" > Took " + to_string(event.GetElapsedTime()) + " ms");
135
136 // Upload data GPU -> CPU
137 std::vector<float_t> out(out_data[i].size(), 0);
138 dev_out.Read(queue, out_data[i].size(), out);
139
140 // FOR DEBUG ONLY
141 nn_warn("output kernel");
142 for (serial_size_t j = 0; j < out.size(); ++j) {
143 std::cout << out[j] << " ";
144 }
145 std::cout << std::endl;
146
147 // copy back
148 std::copy(std::begin(out), std::end(out), std::back_inserter(out_data[i]));
149 }
150#else
151 throw nn_error("Not compiled with OpenCL");
152#endif
153 }
154};
155
157 public:
160
161 void compute(const core::OpKernelContext& context) override {
162 nn_error("Not implemented yet.");
163 }
164};
165
166} // namespace tiny_dnn
Definition conv2d_op_opencl.h:156
Definition conv2d_op_opencl.h:51
Definition device.fwd.h:73
Definition program.h:63
Definition op_kernel.h:55
Definition op_kernel.h:72
Definition op_kernel.h:175
Simple image utility class.
Definition image.h:94
error exception class for tiny-dnn
Definition nn_error.h:37
info class for tiny-dnn (for debug)
Definition nn_error.h:69
warning class for tiny-dnn (for debug)
Definition nn_error.h:52