libpqxx 7.8.1
array.hxx
1/* Handling of SQL arrays.
2 *
3 * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4 *
5 * Copyright (c) 2000-2023, Jeroen T. Vermeulen.
6 *
7 * See COPYING for copyright license. If you did not receive a file called
8 * COPYING with this source code, please notify the distributor of this
9 * mistake, or contact the author.
10 */
11#ifndef PQXX_H_ARRAY
12#define PQXX_H_ARRAY
13
14#if !defined(PQXX_HEADER_PRE)
15# error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16#endif
17
18#include <algorithm>
19#include <cassert>
20#include <stdexcept>
21#include <string>
22#include <type_traits>
23#include <utility>
24#include <vector>
25
26#include "pqxx/connection.hxx"
27#include "pqxx/internal/array-composite.hxx"
28#include "pqxx/internal/encoding_group.hxx"
29#include "pqxx/internal/encodings.hxx"
30
31
32namespace pqxx
33{
34// TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35
37
52template<
53 typename ELEMENT, std::size_t DIMENSIONS = 1u,
54 char SEPARATOR = array_separator<ELEMENT>>
55class array final
56{
57public:
59
68 array(std::string_view data, connection const &conn) :
69 array{data, pqxx::internal::enc_group(conn.encoding_id())}
70 {}
71
73
75 constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76
78
82 std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83 {
84 return m_extents;
85 }
86
87 template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88 {
89 static_assert(sizeof...(index) == DIMENSIONS);
90 check_bounds(index...);
91 return m_elts.at(locate(index...));
92 }
93
95
103 template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104 {
105 static_assert(sizeof...(index) == DIMENSIONS);
106 return m_elts[locate(index...)];
107 }
108
110
115 constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117 constexpr auto cend() const noexcept { return m_elts.cend(); }
119 constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121 constexpr auto crend() const noexcept { return m_elts.crend(); }
122
124
127 constexpr std::size_t size() const noexcept { return m_elts.size(); }
128
130
145 constexpr auto ssize() const noexcept
146 {
147 return static_cast<std::ptrdiff_t>(size());
148 }
149
151
153 constexpr auto front() const noexcept { return m_elts.front(); }
154
156
158 constexpr auto back() const noexcept { return m_elts.back(); }
159
160private:
162
170 void check_dims(std::string_view data)
171 {
172 auto sz{std::size(data)};
173 if (sz < DIMENSIONS * 2)
174 throw conversion_error{pqxx::internal::concat(
175 "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176 "'.")};
177
178 // Making some assumptions here:
179 // * The array holds no extraneous whitespace.
180 // * None of the sub-arrays can be null.
181 // * Only ASCII characters start off with a byte in the 0-127 range.
182 //
183 // Given those, the input must start with a sequence of DIMENSIONS bytes
184 // with the ASCII value for '{'; and likewise it must end with a sequence
185 // of DIMENSIONS bytes with the ASCII value for '}'.
186
187 if (data[0] != '{')
188 throw conversion_error{"Malformed array: does not start with '{'."};
189 for (std::size_t i{0}; i < DIMENSIONS; ++i)
190 if (data[i] != '{')
191 throw conversion_error{pqxx::internal::concat(
192 "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193 if (data[DIMENSIONS] == '{')
194 throw conversion_error{pqxx::internal::concat(
195 "Tried to parse ", DIMENSIONS,
196 "-dimensional array from array data that has more dimensions.")};
197 for (std::size_t i{0}; i < DIMENSIONS; ++i)
198 if (data[sz - 1 - i] != '}')
199 throw conversion_error{
200 "Malformed array: does not end in the right number of '}'."};
201 }
202
203 explicit array(std::string_view data, pqxx::internal::encoding_group enc)
204 {
205 using group = pqxx::internal::encoding_group;
206 switch (enc)
207 {
208 case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
209 case group::BIG5: parse<group::BIG5>(data); break;
210 case group::EUC_CN: parse<group::EUC_CN>(data); break;
211 case group::EUC_JP: parse<group::EUC_JP>(data); break;
212 case group::EUC_KR: parse<group::EUC_KR>(data); break;
213 case group::EUC_TW: parse<group::EUC_TW>(data); break;
214 case group::GB18030: parse<group::GB18030>(data); break;
215 case group::GBK: parse<group::GBK>(data); break;
216 case group::JOHAB: parse<group::JOHAB>(data); break;
217 case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
218 case group::SJIS: parse<group::SJIS>(data); break;
219 case group::UHC: parse<group::UHC>(data); break;
220 case group::UTF8: parse<group::UTF8>(data); break;
221 }
222 }
223
225
228 std::size_t parse_field_end(std::string_view data, std::size_t here) const
229 {
230 auto const sz{std::size(data)};
231 if (here < sz)
232 switch (data[here])
233 {
234 case SEPARATOR:
235 ++here;
236 if (here >= sz)
237 throw conversion_error{"Array looks truncated."};
238 switch (data[here])
239 {
240 case SEPARATOR:
241 throw conversion_error{"Array contains double separator."};
242 case '}': throw conversion_error{"Array contains trailing separator."};
243 default: break;
244 }
245 break;
246 case '}': break;
247 default:
248 throw conversion_error{pqxx::internal::concat(
249 "Unexpected character in array: ",
250 static_cast<unsigned>(static_cast<unsigned char>(data[here])),
251 " where separator or closing brace expected.")};
252 }
253 return here;
254 }
255
257
262 constexpr std::size_t estimate_elements(std::string_view data) const noexcept
263 {
264 // Dirty trick: just count the number of bytes that look as if they may be
265 // separators. At the very worst we may overestimate by a factor of two or
266 // so, in exceedingly rare cases, on some encodings.
267 auto const separators{
268 std::count(std::begin(data), std::end(data), SEPARATOR)};
269 // The number of dimensions makes no difference here. It's still one
270 // separator between consecutive elements, just possibly with some extra
271 // braces as well.
272 return static_cast<std::size_t>(separators + 1);
273 }
274
275 template<pqxx::internal::encoding_group ENC>
276 void parse(std::string_view data)
277 {
278 static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
279 auto const sz{std::size(data)};
280 check_dims(data);
281
282 m_elts.reserve(estimate_elements(data));
283
284 // We discover the array's extents along each of the dimensions, starting
285 // with the final dimension and working our way towards the first. At any
286 // given point during parsing, we know the extents starting at this
287 // dimension.
288 std::size_t know_extents_from{DIMENSIONS};
289
290 // Currently parsing this dimension. We start off at -1, relying on C++'s
291 // well-defined rollover for unsigned numbers.
292 // The actual outermost dimension of the array is 0, and the innermost is
293 // at the end. But, the array as a whole is enclosed in braces just like
294 // each row. So we act like there's an anomalous "outer" dimension holding
295 // the entire array.
296 constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
297
298 // We start parsing at the fictional outer dimension. The input begins
299 // with opening braces, one for each dimension, so we'll start off by
300 // bumping all the way to the innermost dimension.
301 std::size_t dim{outer};
302
303 // Extent counters, one per "real" dimension.
304 // Note initialiser syntax; this should zero-initialise all elements.
305 std::array<std::size_t, DIMENSIONS> extents{};
306#if !defined(NDEBUG)
307 for (auto const e : extents) assert(e == 0u);
308#endif
309
310 // Current parsing position.
311 std::size_t here{0};
312 while (here < sz)
313 {
314 if (data[here] == '{')
315 {
316 if (dim == outer)
317 {
318 // This must be the initial opening brace.
319 if (know_extents_from != DIMENSIONS)
320 throw conversion_error{
321 "Array text representation closed and reopened its outside "
322 "brace pair."};
323 assert(here == 0);
324 }
325 else
326 {
327 if (dim >= (DIMENSIONS - 1))
328 throw conversion_error{
329 "Array seems to have inconsistent number of dimensions."};
330 ++extents[dim];
331 }
332 // (Rolls over to zero if we're coming from the outer dimension.)
333 ++dim;
334 extents[dim] = 0u;
335 ++here;
336 }
337 else if (data[here] == '}')
338 {
339 if (dim == outer)
340 throw conversion_error{"Array has spurious '}'."};
341 if (dim < know_extents_from)
342 {
343 // We just finished parsing our first row in this dimension.
344 // Now we know the array dimension's extent.
345 m_extents[dim] = extents[dim];
346 know_extents_from = dim;
347 }
348 else
349 {
350 if (extents[dim] != m_extents[dim])
351 throw conversion_error{"Rows in array have inconsistent sizes."};
352 }
353 // Bump back down to the next-lower dimension. Which may be the outer
354 // dimension, through underflow.
355 --dim;
356 ++here;
357 here = parse_field_end(data, here);
358 }
359 else
360 {
361 // Found an array element. The actual elements always live in the
362 // "inner" dimension.
363 if (dim != DIMENSIONS - 1)
364 throw conversion_error{
365 "Malformed array: found element where sub-array was expected."};
366 assert(dim != outer);
367 ++extents[dim];
368 std::size_t end;
369 switch (data[here])
370 {
371 case '\0': throw conversion_error{"Unexpected zero byte in array."};
372 case ',': throw conversion_error{"Array contains empty field."};
373 case '"': {
374 // Double-quoted string. We parse it into a buffer before parsing
375 // the resulting string as an element. This seems wasteful: the
376 // string might not contain any special characters. So it's
377 // tempting to check, and try to use a string_view and avoid a
378 // useless copy step. But. Even besides the branch prediction
379 // risk, the very fact that the back-end chose to quote the string
380 // indicates that there is some kind of special character in there.
381 // So in practice, this optimisation would only apply if the only
382 // special characters in the string were commas.
383 end = pqxx::internal::scan_double_quoted_string<ENC>(
384 std::data(data), std::size(data), here);
385 // TODO: scan_double_quoted_string() with reusable buffer.
386 std::string const buf{
387 pqxx::internal::parse_double_quoted_string<ENC>(
388 std::data(data), end, here)};
389 m_elts.emplace_back(from_string<ELEMENT>(buf));
390 }
391 break;
392 default: {
393 // Unquoted string. An unquoted string is always literal, no
394 // escaping or encoding, so we don't need to parse it into a
395 // buffer. We can just read it as a string_view.
396 end = pqxx::internal::scan_unquoted_string<ENC, SEPARATOR, '}'>(
397 std::data(data), std::size(data), here);
398 std::string_view const field{
399 std::string_view{std::data(data) + here, end - here}};
400 if (field == "NULL")
401 {
402 if constexpr (nullness<ELEMENT>::has_null)
403 m_elts.emplace_back(nullness<ELEMENT>::null());
404 else
405 throw unexpected_null{pqxx::internal::concat(
406 "Array contains a null ", type_name<ELEMENT>,
407 ". Consider making it an array of std::optional<",
408 type_name<ELEMENT>, "> instead.")};
409 }
410 else
411 m_elts.emplace_back(from_string<ELEMENT>(field));
412 }
413 }
414 here = end;
415 here = parse_field_end(data, here);
416 }
417 }
418
419 if (dim != outer)
420 throw conversion_error{"Malformed array; may be truncated."};
421 assert(know_extents_from == 0);
422
423 init_factors();
424 }
425
427 void init_factors() noexcept
428 {
429 std::size_t factor{1};
430 for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
431 {
432 factor *= m_extents[dim];
433 m_factors[dim - 1] = factor;
434 }
435 }
436
438 template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
439 {
440 static_assert(
441 sizeof...(index) == DIMENSIONS,
442 "Indexing array with wrong number of dimensions.");
443 return add_index(index...);
444 }
445
446 template<typename OUTER, typename... INDEX>
447 constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
448 {
449 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
450 if constexpr (sizeof...(indexes) == 0)
451 {
452 return first;
453 }
454 else
455 {
456 static_assert(sizeof...(indexes) < DIMENSIONS);
457 // (Offset by 1 here because the outer dimension is not in there.)
458 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
459 static_assert(dimension < DIMENSIONS);
460 return first * m_factors[dimension] + add_index(indexes...);
461 }
462 }
463
465
467 template<typename OUTER, typename... INDEX>
468 constexpr void check_bounds(OUTER outer, INDEX... indexes) const
469 {
470 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
471 static_assert(sizeof...(indexes) < DIMENSIONS);
472 // (Offset by 1 here because the outer dimension is not in there.)
473 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
474 static_assert(dimension < DIMENSIONS);
475 if (first >= m_extents[dimension])
476 throw range_error{pqxx::internal::concat(
477 "Array index for dimension ", dimension, " is out of bounds: ", first,
478 " >= ", m_extents[dimension])};
479
480 // Now check the rest of the indexes, if any.
481 if constexpr (sizeof...(indexes) > 0)
482 check_bounds(indexes...);
483 }
484
486 std::vector<ELEMENT> m_elts;
487
489 std::array<std::size_t, DIMENSIONS> m_extents;
490
492
499 std::array<std::size_t, DIMENSIONS - 1> m_factors;
500};
501
502
504
526class PQXX_LIBEXPORT array_parser
527{
528public:
530 enum class juncture
531 {
533 row_start,
535 row_end,
537 null_value,
539 string_value,
541 done,
542 };
543
545
549 explicit array_parser(
550 std::string_view input,
551 internal::encoding_group = internal::encoding_group::MONOBYTE);
552
554
560 std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
561
562private:
563 std::string_view m_input;
564
566 std::size_t m_pos = 0u;
567
569
574 using implementation = std::pair<juncture, std::string> (array_parser::*)();
575
577 static implementation
578 specialize_for_encoding(pqxx::internal::encoding_group enc);
579
581 implementation m_impl;
582
584 template<pqxx::internal::encoding_group>
585 std::pair<juncture, std::string> parse_array_step();
586
587 template<pqxx::internal::encoding_group>
588 std::string::size_type scan_double_quoted_string() const;
589 template<pqxx::internal::encoding_group>
590 std::string parse_double_quoted_string(std::string::size_type end) const;
591 template<pqxx::internal::encoding_group>
592 std::string::size_type scan_unquoted_string() const;
593 template<pqxx::internal::encoding_group>
594 std::string parse_unquoted_string(std::string::size_type end) const;
595
596 template<pqxx::internal::encoding_group>
597 std::string::size_type scan_glyph(std::string::size_type pos) const;
598 template<pqxx::internal::encoding_group>
599 std::string::size_type
600 scan_glyph(std::string::size_type pos, std::string::size_type end) const;
601};
602} // namespace pqxx
603#endif
The home of all libpqxx classes, functions, templates, etc.
Definition array.hxx:33
An SQL array received from the database.
Definition array.hxx:56
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition array.hxx:158
constexpr auto cend() const noexcept
Return end point of iteration.
Definition array.hxx:117
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition array.hxx:119
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition array.hxx:103
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition array.hxx:127
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition array.hxx:145
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition array.hxx:75
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition array.hxx:115
ELEMENT const & at(INDEX... index) const
Definition array.hxx:87
array(std::string_view data, connection const &conn)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition array.hxx:68
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition array.hxx:121
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition array.hxx:82
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition array.hxx:153
Low-level array parser.
Definition array.hxx:527
juncture
What's the latest thing found in the array?
Definition array.hxx:531
std::pair< juncture, std::string > get_next()
Parse the next step in the array.
Definition array.hxx:560
Connection to a database.
Definition connection.hxx:253
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition strconv.hxx:95