6 #include "evhtp/parser.h"
7 #include "evhtp/config.h"
10 #if '\n' != '\x0a' || 'A' != 65
11 #error "You have somehow found a non-ASCII host. We can't build here."
14 #define PARSER_STACK_MAX 8192
15 #define LF (unsigned char)10
16 #define CR (unsigned char)13
17 #define CRLF "\x0d\x0a"
125 "struct htparser {\n"
126 " htpparse_error = %d\n"
127 " parser_state = %d\n"
128 " parser_flags = %d\n"
129 " eval_hdr_val = %d\n"
136 " content_len = %zu\n"
138 " bytes_read = %zu\n"
139 " total_read = %zu\n"
141 " status_count = %d\n"
142 " scheme_offset = %s\n"
143 " host_offset = %s\n"
144 " port_offset = %s\n"
145 " path_offset = %s\n"
146 " args_offset = %s\n"
178 #define log_htparser__s_(p)
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
197 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
200 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
204 "htparse_error_none",
205 "htparse_error_too_big",
206 "htparse_error_invalid_method",
207 "htparse_error_invalid_requestline",
208 "htparse_error_invalid_schema",
209 "htparse_error_invalid_protocol",
210 "htparse_error_invalid_version",
211 "htparse_error_invalid_header",
212 "htparse_error_invalid_chunk_size",
213 "htparse_error_invalid_chunk",
214 "htparse_error_invalid_state",
215 "htparse_error_user",
216 "htparse_error_unknown"
238 #define _MIN_READ(a, b) ((a) < (b) ? (a) : (b))
240 #ifndef HOST_BIG_ENDIAN
242 #define _str3_cmp(m, c0, c1, c2, c3) \
243 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
245 #define _str3Ocmp(m, c0, c1, c2, c3) \
246 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
248 #define _str4cmp(m, c0, c1, c2, c3) \
249 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
251 #define _str5cmp(m, c0, c1, c2, c3, c4) \
252 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
255 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
256 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
257 && (((uint32_t *)m)[1] & 0xffff) == ((c5 << 8) | c4)
259 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
260 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
261 && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
263 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
264 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
265 && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
267 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
268 *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
269 && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \
273 #define _str3_cmp(m, c0, c1, c2, c3) \
274 m[0] == c0 && m[1] == c1 && m[2] == c2
276 #define _str3Ocmp(m, c0, c1, c2, c3) \
277 m[0] == c0 && m[2] == c2 && m[3] == c3
279 #define _str4cmp(m, c0, c1, c2, c3) \
280 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3
282 #define _str5cmp(m, c0, c1, c2, c3, c4) \
283 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 && m[4] == c4
285 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
286 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
287 && m[4] == c4 && m[5] == c5
289 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
290 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
291 && m[4] == c4 && m[5] == c5 && m[6] == c6
293 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
294 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
295 && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7
297 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
298 m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
299 && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7 && m[8] == c8
303 #define __HTPARSE_GENHOOK(__n) \
304 static inline int hook_ ## __n ## _run(htparser * p, htparse_hooks * hooks) { \
305 log_debug("enter"); \
306 if (hooks && (hooks)->__n) \
308 return (hooks)->__n(p); \
314 #define __HTPARSE_GENDHOOK(__n) \
315 static inline int hook_ ## __n ## _run(htparser * p, \
316 htparse_hooks * hooks, \
317 const char * s, size_t l) { \
318 log_debug("enter"); \
319 if (hooks && (hooks)->__n) \
321 return (hooks)->__n(p, s, l); \
348 static inline uint64_t
354 while (n && isblank(str[n - 1]))
366 for (value = 0; n--; str++)
370 if (*str < '0' || *str >
'9')
376 check = value * 10 + (*str -
'0');
378 if ((value && check <= value))
390 static inline ssize_t
400 for (value = 0; n--; str++)
402 if (*str < '0' || *str >
'9')
407 value = value * 10 + (*str -
'0');
410 if (value > INTMAX_MAX)
431 if (e > htparse_error_generic)
433 return "htparse_no_such_error";
483 if (meth >= htp_method_UNKNOWN)
567 memset(p, 0, offsetof(
htparser, buf));
570 p->
error = htparse_error_none;
571 p->
method = htp_method_UNKNOWN;
584 char c = (
unsigned char)(ch | 0x20);
586 if (c >=
'a' && c <=
'z')
591 if ((ch >=
'0' && ch <=
'9') || ch ==
'.' || ch ==
'-')
606 return htp_method_GET;
611 return htp_method_PUT;
620 return htp_method_POST;
625 return htp_method_COPY;
630 return htp_method_MOVE;
635 return htp_method_LOCK;
638 if (
_str4cmp(m,
'H',
'E',
'A',
'D'))
640 return htp_method_HEAD;
646 if (
_str5cmp(m,
'M',
'K',
'C',
'O',
'L'))
648 return htp_method_MKCOL;
651 if (
_str5cmp(m,
'T',
'R',
'A',
'C',
'E'))
653 return htp_method_TRACE;
656 if (
_str5cmp(m,
'P',
'A',
'T',
'C',
'H'))
658 return htp_method_PATCH;
663 if (
_str6cmp(m,
'D',
'E',
'L',
'E',
'T',
'E'))
665 return htp_method_DELETE;
668 if (
_str6cmp(m,
'U',
'N',
'L',
'O',
'C',
'K'))
670 return htp_method_UNLOCK;
675 if (
_str7_cmp(m,
'O',
'P',
'T',
'I',
'O',
'N',
'S',
'\0'))
677 return htp_method_OPTIONS;
680 if (
_str7_cmp(m,
'C',
'O',
'N',
'N',
'E',
'C',
'T',
'\0'))
682 return htp_method_CONNECT;
687 if (
_str8cmp(m,
'P',
'R',
'O',
'P',
'F',
'I',
'N',
'D'))
689 return htp_method_PROPFIND;
695 if (
_str9cmp(m,
'P',
'R',
'O',
'P',
'P',
'A',
'T',
'C',
'H'))
697 return htp_method_PROPPATCH;
703 return htp_method_UNKNOWN;
706 #define HTP_SET_BUF(CH) do { \
707 if (evhtp_likely((p->buf_idx + 1) < PARSER_STACK_MAX)) { \
708 p->buf[p->buf_idx++] = CH; \
709 p->buf[p->buf_idx] = '\0'; \
711 p->error = htparse_error_too_big; \
727 p->
error = htparse_error_none;
730 for (i = 0; i < len; i++)
737 log_debug(
"[%p] data[%zu] = %c (%x)", p, i, isprint(ch) ? ch :
' ', ch);
746 if (ch ==
CR || ch ==
LF)
751 if ((ch < 'A' || ch >
'Z') && ch !=
'_')
753 p->
error = htparse_error_inval_reqline;
755 log_debug(
"s_start invalid fist char '%c'", ch);
763 p->
error = htparse_error_none;
764 p->
method = htp_method_UNKNOWN;
779 res = hook_on_msg_begin_run(p, hooks);
785 }
else if (p->
type == htp_type_response && ch ==
'H') {
788 log_debug(
"not type of request or response?");
791 p->
error = htparse_error_inval_reqline;
797 p->
error = htparse_error_user;
810 res = hook_method_run(p, hooks, p->
buf, p->
buf_idx);
817 p->
error = htparse_error_user;
823 if ((ch < 'A' || ch >
'Z') && ch !=
'_')
825 p->
error = htparse_error_inval_method;
837 log_debug(
"[%p] s_spaces_before_uri", p);
846 if (p->
method == htp_method_CONNECT)
861 p->
error = htparse_error_inval_reqline;
889 c = (
unsigned char)(ch | 0x20);
891 if (c >=
'a' && c <=
'z') {
900 p->
error = htparse_error_inval_reqline;
910 c = (
unsigned char)(ch | 0x20);
912 if (c >=
'a' && c <=
'z') {
919 p->
scheme = htp_scheme_unknown;
925 p->
scheme = htp_scheme_ftp;
931 p->
scheme = htp_scheme_nfs;
939 p->
scheme = htp_scheme_http;
946 p->
scheme = htp_scheme_https;
952 res = hook_scheme_run(p, hooks,
961 p->
error = htparse_error_user;
967 p->
error = htparse_error_inval_schema;
982 p->
error = htparse_error_inval_schema;
987 log_debug(
"[%p] s_schema_slash_slash", p);
997 p->
error = htparse_error_inval_schema;
1016 res = hook_host_run(p, hooks,
1022 p->
error = htparse_error_user;
1046 p->
error = htparse_error_inval_state;
1063 p->
error = htparse_error_inval_schema;
1069 p->
error = htparse_error_user;
1075 c = (
unsigned char)(ch | 0x20);
1077 if ((c >=
'a' && c <=
'f')
1078 || (ch >=
'0' && ch <=
'9')
1090 p->
error = htparse_error_user;
1099 p->
error = htparse_error_inval_schema;
1104 if (ch >=
'0' && ch <=
'9') {
1119 p->
error = htparse_error_inval_state;
1135 p->
error = htparse_error_inval_reqline;
1144 p->
error = htparse_error_user;
1150 log_debug(
"[%p] s_after_slash_in_uri", p);
1154 if (
usual[ch >> 5] & (1 << (ch & 0x1f)))
1167 int r2 = hook_uri_run(p, hooks, p->
buf, p->
buf_idx);
1213 p->
error = htparse_error_user;
1226 if (
usual[ch >> 5] & (1 << (ch & 0x1f))) {
1250 r2 = hook_uri_run(p, hooks, p->
buf, p->
buf_idx);
1277 res = hook_path_run(p, hooks,
1294 p->
error = htparse_error_user;
1306 if (
usual[ch >> 5] & (1 << (ch & 0x1f))) {
1371 p->
error = htparse_error_user;
1398 p->
error = htparse_error_inval_proto;
1411 p->
error = htparse_error_inval_proto;
1421 p->
error = htparse_error_inval_proto;
1431 p->
error = htparse_error_inval_proto;
1441 p->
error = htparse_error_inval_proto;
1446 if (ch < '1' || ch >
'9')
1448 p->
error = htparse_error_inval_ver;
1452 p->
major = ch -
'0';
1462 if (ch < '0' || ch >
'9')
1464 p->
error = htparse_error_inval_ver;
1471 if (ch < '0' || ch >
'9')
1473 p->
error = htparse_error_inval_ver;
1477 p->
minor = ch -
'0';
1486 }
else if (p->
type == htp_type_response)
1497 p->
error = htparse_error_inval_reqline;
1498 log_debug(
"[s_minor_digit] LF without CR!");
1503 if (ch < '0' || ch >
'9')
1505 p->
error = htparse_error_inval_ver;
1524 if (ch < '0' || ch >
'9')
1526 p->
error = htparse_error_status;
1550 p->
error = htparse_error_generic;
1577 p->
error = htparse_error_inval_ver;
1587 res = hook_on_hdrs_begin_run(p, hooks);
1591 p->
error = htparse_error_user;
1602 res = hook_on_hdrs_begin_run(p, hooks);
1605 p->
error = htparse_error_user;
1610 p->
error = htparse_error_inval_reqline;
1654 res = hook_hdr_key_run(p, hooks, p->
buf, p->
buf_idx);
1661 if (!strcasecmp(p->
buf,
"host"))
1667 if (!strcasecmp(p->
buf,
"connection"))
1673 if (!strcasecmp(p->
buf,
"content-type"))
1679 if (!strcasecmp(p->
buf,
"content-length"))
1685 if (!strcasecmp(p->
buf,
"proxy-connection"))
1691 if (!strcasecmp(p->
buf,
"transfer-encoding"))
1703 p->
error = htparse_error_user;
1733 log_debug(
"[%p] s_hdrline_hdr_space_before_val", p);
1757 p->
error = htparse_error_inval_hdr;
1776 if (hook_hostname_run(p, hooks, p->
buf, p->
buf_idx))
1779 p->
error = htparse_error_user;
1792 p->
error = htparse_error_too_big;
1798 switch (p->
buf[0]) {
1810 A_case = (p->
buf[5] ==
'A') ?
'A' :
'a';
1811 S_buf = (
const char *)(p->
buf + 1);
1814 'e',
'e',
'p',
'-', A_case,
'l',
'i',
'v',
'e'))
1826 C_case = (p->
buf[0] ==
'C') ?
'C' :
'c';
1827 S_buf = (
const char *)p->
buf;
1829 if (
_str5cmp(S_buf, C_case,
'l',
'o',
's',
'e'))
1842 switch (p->
buf[0]) {
1852 S_buf = (
const char *)(p->
buf + 1);
1854 if (
_str6cmp(S_buf,
'h',
'u',
'n',
'k',
'e',
'd'))
1869 switch (p->
buf[0]) {
1874 S_buf = (
const char *)(p->
buf + 1);
1876 if (
_str8cmp(S_buf,
'u',
'l',
't',
'i',
'p',
'a',
'r',
't'))
1898 p->
error = htparse_error_inval_hdr;
1915 log_debug(
"[%p] s_hdrline_hdr_almost_done", p);
1922 res = hook_on_msg_complete_run(p, hooks);
1930 p->
error = htparse_error_inval_hdr;
1936 p->
error = htparse_error_user;
1942 log_debug(
"[%p] s_hdrline_hdr_done", p);
1946 res = hook_hdr_val_run(p, hooks, p->
buf, p->
buf_idx);
1951 p->
error = htparse_error_user;
1958 p->
error = htparse_error_inval_hdr;
1967 res = hook_hdr_val_run(p, hooks, p->
buf, p->
buf_idx);
1975 p->
error = htparse_error_user;
1983 log_debug(
"[%p] s_hdrline_almost_done", p);
1987 res = hook_on_hdrs_complete_run(p, hooks);
1991 p->
error = htparse_error_user;
1999 res = hook_on_msg_complete_run(p, hooks);
2009 res = hook_on_msg_complete_run(p, hooks);
2017 p->
error = htparse_error_user;
2023 p->
error = htparse_error_inval_hdr;
2029 p->
error = htparse_error_user;
2041 res = hook_on_msg_complete_run(p, hooks);
2053 res = hook_on_msg_complete_run(p, hooks);
2059 p->
error = htparse_error_user;
2065 c =
unhex[(
unsigned char)ch];
2069 p->
error = htparse_error_inval_chunk_sz;
2083 c =
unhex[(
unsigned char)ch];
2087 p->
error = htparse_error_inval_chunk_sz;
2098 p->
error = htparse_error_inval_chunk_sz;
2106 res = hook_on_chunks_complete_run(p, hooks);
2111 res = hook_on_new_chunk_run(p, hooks);
2118 p->
error = htparse_error_user;
2127 const char * pp = &data[i];
2128 const char * pe = (
const char *)(data + len);
2133 res = hook_body_run(p, hooks, pp, to_read);
2148 p->
error = htparse_error_user;
2157 p->
error = htparse_error_inval_chunk;
2167 p->
error = htparse_error_inval_chunk;
2174 if (hook_on_chunk_complete_run(p, hooks))
2176 p->
error = htparse_error_user;
2186 const char * pp = &data[i];
2187 const char * pe = (
const char *)(data + len);
2192 res = hook_body_run(p, hooks, pp, to_read);
2200 res = hook_on_msg_complete_run(p, hooks);
2206 p->
error = htparse_error_user;
2214 log_debug(
"[%p] This is a silly state....", p);
2215 p->
error = htparse_error_inval_state;