MagickCore  7.1.1-43
Convert, Edit, Or Compose Bitmap Images
token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % TTTTT OOO K K EEEEE N N %
7 % T O O K K E NN N %
8 % T O O KKK EEE N N N %
9 % T O O K K E N NN %
10 % T OOO K K EEEEE N N %
11 % %
12 % %
13 % MagickCore Token Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % January 1993 %
18 % %
19 % %
20 % Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
22 % %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
25 % %
26 % https://imagemagick.org/script/license.php %
27 % %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
33 % %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 
40 /*
41  Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/image-private.h"
48 #include "MagickCore/locale-private.h"
49 #include "MagickCore/memory_.h"
50 #include "MagickCore/memory-private.h"
51 #include "MagickCore/string_.h"
52 #include "MagickCore/string-private.h"
53 #include "MagickCore/token.h"
54 #include "MagickCore/token-private.h"
55 #include "MagickCore/utility.h"
56 #include "MagickCore/utility-private.h"
57 
58 /*
59  Typedef declarations.
60 */
61 struct _TokenInfo
62 {
63  int
64  state;
65 
66  MagickStatusType
67  flag;
68 
69  ssize_t
70  offset;
71 
72  char
73  quote;
74 
75  size_t
76  signature;
77 };
78 
79 /*
80 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
81 % %
82 % %
83 % %
84 % A c q u i r e T o k e n I n f o %
85 % %
86 % %
87 % %
88 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
89 %
90 % AcquireTokenInfo() allocates the TokenInfo structure.
91 %
92 % The format of the AcquireTokenInfo method is:
93 %
94 % TokenInfo *AcquireTokenInfo()
95 %
96 */
97 MagickExport TokenInfo *AcquireTokenInfo(void)
98 {
99  TokenInfo
100  *token_info;
101 
102  token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
103  token_info->signature=MagickCoreSignature;
104  return(token_info);
105 }
106 
107 /*
108 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 % %
110 % %
111 % %
112 % D e s t r o y T o k e n I n f o %
113 % %
114 % %
115 % %
116 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117 %
118 % DestroyTokenInfo() deallocates memory associated with an TokenInfo
119 % structure.
120 %
121 % The format of the DestroyTokenInfo method is:
122 %
123 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124 %
125 % A description of each parameter follows:
126 %
127 % o token_info: Specifies a pointer to an TokenInfo structure.
128 %
129 */
130 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131 {
132  assert(token_info != (TokenInfo *) NULL);
133  assert(token_info->signature == MagickCoreSignature);
134  if (IsEventLogging() != MagickFalse)
135  (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136  token_info->signature=(~MagickCoreSignature);
137  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138  return(token_info);
139 }
140 
141 /*
142 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143 % %
144 % %
145 % %
146 + G e t N e x t T o k e n %
147 % %
148 % %
149 % %
150 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151 %
152 % GetNextToken() gets a token from the token stream. A token is defined as
153 % a sequence of characters delimited by whitespace (e.g. clip-path), a
154 % sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155 % parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
156 % separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
157 % length of the consumed token.
158 %
159 % The format of the GetNextToken method is:
160 %
161 % size_t GetNextToken(const char *magick_restrict start,
162 % const char **magick_restrict end,const size_t extent,
163 % char *magick_restrict token)
164 %
165 % A description of each parameter follows:
166 %
167 % o start: the start of the token sequence.
168 %
169 % o end: point to the end of the token sequence.
170 %
171 % o extent: maximum extent of the token.
172 %
173 % o token: copy the token to this buffer.
174 %
175 */
176 MagickExport magick_hot_spot size_t GetNextToken(
177  const char *magick_restrict start,const char **magick_restrict end,
178  const size_t extent,char *magick_restrict token)
179 {
180  char
181  *magick_restrict q;
182 
183  const char
184  *magick_restrict p;
185 
186  double
187  value;
188 
189  ssize_t
190  i;
191 
192  assert(start != (const char *) NULL);
193  assert(token != (char *) NULL);
194  i=0;
195  p=start;
196  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197  p++;
198  switch (*p)
199  {
200  case '\0':
201  break;
202  case '"':
203  case '\'':
204  case '`':
205  case '{':
206  {
207  char
208  escape;
209 
210  switch (*p)
211  {
212  case '"': escape='"'; break;
213  case '\'': escape='\''; break;
214  case '`': escape='\''; break;
215  case '{': escape='}'; break;
216  default: escape=(*p); break;
217  }
218  for (p++; *p != '\0'; p++)
219  {
220  if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221  p++;
222  else
223  if (*p == escape)
224  {
225  p++;
226  break;
227  }
228  if (i < (ssize_t) (extent-1))
229  token[i++]=(*p);
230  if ((size_t) (p-start) >= (extent-1))
231  break;
232  }
233  break;
234  }
235  case '/':
236  {
237  if (i < (ssize_t) (extent-1))
238  token[i++]=(*p);
239  p++;
240  if ((*p == '>') || (*p == '/'))
241  {
242  if (i < (ssize_t) (extent-1))
243  token[i++]=(*p);
244  p++;
245  }
246  break;
247  }
248  default:
249  {
250  value=StringToDouble(p,&q);
251  (void) value;
252  if ((p != q) && (*p != ','))
253  {
254  for ( ; (p < q) && (*p != ','); p++)
255  {
256  if (i < (ssize_t) (extent-1))
257  token[i++]=(*p);
258  if ((size_t) (p-start) >= (extent-1))
259  break;
260  }
261  if (*p == '%')
262  {
263  if (i < (ssize_t) (extent-1))
264  token[i++]=(*p);
265  p++;
266  }
267  break;
268  }
269  if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
270  (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
271  {
272  if (i < (ssize_t) (extent-1))
273  token[i++]=(*p);
274  p++;
275  break;
276  }
277  for ( ; *p != '\0'; p++)
278  {
279  if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
280  (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
281  break;
282  if ((i > 0) && (*p == '<'))
283  break;
284  if (i < (ssize_t) (extent-1))
285  token[i++]=(*p);
286  if (*p == '>')
287  break;
288  if (*p == '(')
289  {
290  for (p++; *p != '\0'; p++)
291  {
292  if (i < (ssize_t) (extent-1))
293  token[i++]=(*p);
294  if ((*p == ')') && (*(p-1) != '\\'))
295  break;
296  if ((size_t) (p-start) >= (extent-1))
297  break;
298  }
299  if (*p == '\0')
300  break;
301  }
302  if ((size_t) (p-start) >= (extent-1))
303  break;
304  }
305  break;
306  }
307  }
308  token[i]='\0';
309  if (LocaleNCompare(token,"url(#",5) == 0)
310  {
311  q=strrchr(token,')');
312  if (q != (char *) NULL)
313  {
314  *q='\0';
315  (void) memmove(token,token+5,(size_t) (q-token-4));
316  }
317  }
318  while (isspace((int) ((unsigned char) *p)) != 0)
319  p++;
320  if (end != (const char **) NULL)
321  *end=(const char *) p;
322  return((size_t) (p-start+1));
323 }
324 
325 /*
326 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
327 % %
328 % %
329 % %
330 % G l o b E x p r e s s i o n %
331 % %
332 % %
333 % %
334 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335 %
336 % GlobExpression() returns MagickTrue if the expression matches the pattern.
337 %
338 % The format of the GlobExpression function is:
339 %
340 % MagickBooleanType GlobExpression(const char *magick_restrict expression,
341 % const char *magick_restrict pattern,
342 % const MagickBooleanType case_insensitive)
343 %
344 % A description of each parameter follows:
345 %
346 % o expression: Specifies a pointer to a text string containing a file name.
347 %
348 % o pattern: Specifies a pointer to a text string containing a pattern.
349 %
350 % o case_insensitive: set to MagickTrue to ignore the case when matching
351 % an expression.
352 %
353 */
354 MagickExport MagickBooleanType GlobExpression(
355  const char *magick_restrict expression,const char *magick_restrict pattern,
356  const MagickBooleanType case_insensitive)
357 {
358  char
359  path[MagickPathExtent];
360 
361  MagickBooleanType
362  done,
363  match;
364 
365  /*
366  Return on empty pattern or '*'.
367  */
368  if (pattern == (char *) NULL)
369  return(MagickTrue);
370  if (GetUTFCode(pattern) == 0)
371  return(MagickTrue);
372  if (LocaleCompare(pattern,"*") == 0)
373  return(MagickTrue);
374  GetPathComponent(pattern,SubimagePath,path);
375  if (*path != '\0')
376  return(MagickFalse);
377  /*
378  Evaluate glob expression.
379  */
380  done=MagickFalse;
381  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
382  {
383  if (GetUTFCode(expression) == 0)
384  if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
385  break;
386  switch (GetUTFCode(pattern))
387  {
388  case '*':
389  {
390  MagickBooleanType
391  status;
392 
393  status=MagickFalse;
394  while (GetUTFCode(pattern) == '*')
395  pattern+=GetUTFOctets(pattern);
396  while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
397  {
398  status=GlobExpression(expression,pattern,case_insensitive);
399  expression+=GetUTFOctets(expression);
400  }
401  if (status != MagickFalse)
402  {
403  while (GetUTFCode(expression) != 0)
404  expression+=GetUTFOctets(expression);
405  while (GetUTFCode(pattern) != 0)
406  pattern+=GetUTFOctets(pattern);
407  }
408  break;
409  }
410  case '[':
411  {
412  int
413  c;
414 
415  pattern+=GetUTFOctets(pattern);
416  for ( ; ; )
417  {
418  if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
419  {
420  done=MagickTrue;
421  break;
422  }
423  if (GetUTFCode(pattern) == '\\')
424  {
425  pattern+=GetUTFOctets(pattern);
426  if (GetUTFCode(pattern) == 0)
427  {
428  done=MagickTrue;
429  break;
430  }
431  }
432  if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
433  {
434  c=GetUTFCode(pattern);
435  pattern+=GetUTFOctets(pattern);
436  pattern+=GetUTFOctets(pattern);
437  if (GetUTFCode(pattern) == ']')
438  {
439  done=MagickTrue;
440  break;
441  }
442  if (GetUTFCode(pattern) == '\\')
443  {
444  pattern+=GetUTFOctets(pattern);
445  if (GetUTFCode(pattern) == 0)
446  {
447  done=MagickTrue;
448  break;
449  }
450  }
451  if ((GetUTFCode(expression) < c) ||
452  (GetUTFCode(expression) > GetUTFCode(pattern)))
453  {
454  pattern+=GetUTFOctets(pattern);
455  continue;
456  }
457  }
458  else
459  if (GetUTFCode(pattern) != GetUTFCode(expression))
460  {
461  pattern+=GetUTFOctets(pattern);
462  continue;
463  }
464  pattern+=GetUTFOctets(pattern);
465  while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
466  {
467  if ((GetUTFCode(pattern) == '\\') &&
468  (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
469  pattern+=GetUTFOctets(pattern);
470  pattern+=GetUTFOctets(pattern);
471  }
472  if (GetUTFCode(pattern) != 0)
473  {
474  pattern+=GetUTFOctets(pattern);
475  expression+=GetUTFOctets(expression);
476  }
477  break;
478  }
479  break;
480  }
481  case '?':
482  {
483  pattern+=GetUTFOctets(pattern);
484  expression+=GetUTFOctets(expression);
485  break;
486  }
487  case '{':
488  {
489  char
490  *target;
491 
492  char
493  *p;
494 
495  target=AcquireString(pattern);
496  p=target;
497  pattern++;
498  while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
499  {
500  *p++=(*pattern++);
501  if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
502  {
503  *p='\0';
504  match=GlobExpression(expression,target,case_insensitive);
505  if (match != MagickFalse)
506  {
507  expression+=MagickMin(strlen(expression),strlen(target));
508  break;
509  }
510  p=target;
511  pattern+=GetUTFOctets(pattern);
512  }
513  }
514  while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
515  pattern+=GetUTFOctets(pattern);
516  if (GetUTFCode(pattern) != 0)
517  pattern+=GetUTFOctets(pattern);
518  target=DestroyString(target);
519  break;
520  }
521  case '\\':
522  {
523  pattern+=GetUTFOctets(pattern);
524  if (GetUTFCode(pattern) == 0)
525  break;
526  magick_fallthrough;
527  }
528  default:
529  {
530  if (case_insensitive != MagickFalse)
531  {
532  if (LocaleToLowercase((int) GetUTFCode(expression)) != LocaleToLowercase((int) GetUTFCode(pattern)))
533  {
534  done=MagickTrue;
535  break;
536  }
537  }
538  else
539  if (GetUTFCode(expression) != GetUTFCode(pattern))
540  {
541  done=MagickTrue;
542  break;
543  }
544  expression+=GetUTFOctets(expression);
545  pattern+=GetUTFOctets(pattern);
546  }
547  }
548  }
549  while (GetUTFCode(pattern) == '*')
550  pattern+=GetUTFOctets(pattern);
551  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
552  MagickTrue : MagickFalse;
553  return(match);
554 }
555 
556 /*
557 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
558 % %
559 % %
560 % %
561 + I s G l o b %
562 % %
563 % %
564 % %
565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
566 %
567 % IsGlob() returns MagickTrue if the path specification contains a globbing
568 % pattern.
569 %
570 % The format of the IsGlob method is:
571 %
572 % MagickBooleanType IsGlob(const char *geometry)
573 %
574 % A description of each parameter follows:
575 %
576 % o path: the path.
577 %
578 */
579 MagickPrivate MagickBooleanType IsGlob(const char *path)
580 {
581  MagickBooleanType
582  status = MagickFalse;
583 
584  const char
585  *p;
586 
587  if (IsPathAccessible(path) != MagickFalse)
588  return(MagickFalse);
589  for (p=path; *p != '\0'; p++)
590  {
591  switch (*p)
592  {
593  case '*':
594  case '?':
595  case '{':
596  case '}':
597  case '[':
598  case ']':
599  {
600  status=MagickTrue;
601  break;
602  }
603  default:
604  break;
605  }
606  }
607  return(status);
608 }
609 
610 /*
611 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
612 % %
613 % %
614 % %
615 % T o k e n i z e r %
616 % %
617 % %
618 % %
619 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
620 %
621 % Tokenizer() is a generalized, finite state token parser. It extracts tokens
622 % one at a time from a string of characters. The characters used for white
623 % space, for break characters, and for quotes can be specified. Also,
624 % characters in the string can be preceded by a specifiable escape character
625 % which removes any special meaning the character may have.
626 %
627 % Here is some terminology:
628 %
629 % o token: A single unit of information in the form of a group of
630 % characters.
631 %
632 % o white space: Apace that gets ignored (except within quotes or when
633 % escaped), like blanks and tabs. in addition, white space terminates a
634 % non-quoted token.
635 %
636 % o break set: One or more characters that separates non-quoted tokens.
637 % Commas are a common break character. The usage of break characters to
638 % signal the end of a token is the same as that of white space, except
639 % multiple break characters with nothing or only white space between
640 % generate a null token for each two break characters together.
641 %
642 % For example, if blank is set to be the white space and comma is set to
643 % be the break character, the line
644 %
645 % A, B, C , , DEF
646 %
647 % ... consists of 5 tokens:
648 %
649 % 1) "A"
650 % 2) "B"
651 % 3) "C"
652 % 4) "" (the null string)
653 % 5) "DEF"
654 %
655 % o Quote character: A character that, when surrounding a group of other
656 % characters, causes the group of characters to be treated as a single
657 % token, no matter how many white spaces or break characters exist in
658 % the group. Also, a token always terminates after the closing quote.
659 % For example, if ' is the quote character, blank is white space, and
660 % comma is the break character, the following string
661 %
662 % A, ' B, CD'EF GHI
663 %
664 % ... consists of 4 tokens:
665 %
666 % 1) "A"
667 % 2) " B, CD" (note the blanks & comma)
668 % 3) "EF"
669 % 4) "GHI"
670 %
671 % The quote characters themselves do not appear in the resultant
672 % tokens. The double quotes are delimiters i use here for
673 % documentation purposes only.
674 %
675 % o Escape character: A character which itself is ignored but which
676 % causes the next character to be used as is. ^ and \ are often used
677 % as escape characters. An escape in the last position of the string
678 % gets treated as a "normal" (i.e., non-quote, non-white, non-break,
679 % and non-escape) character. For example, assume white space, break
680 % character, and quote are the same as in the above examples, and
681 % further, assume that ^ is the escape character. Then, in the string
682 %
683 % ABC, ' DEF ^' GH' I ^ J K^ L ^
684 %
685 % ... there are 7 tokens:
686 %
687 % 1) "ABC"
688 % 2) " DEF ' GH"
689 % 3) "I"
690 % 4) " " (a lone blank)
691 % 5) "J"
692 % 6) "K L"
693 % 7) "^" (passed as is at end of line)
694 %
695 % The format of the Tokenizer method is:
696 %
697 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
698 % const size_t max_token_length,const char *line,const char *white,
699 % const char *break_set,const char *quote,const char escape,
700 % char *breaker,int *next,char *quoted)
701 %
702 % A description of each parameter follows:
703 %
704 % o flag: right now, only the low order 3 bits are used.
705 %
706 % 1 => convert non-quoted tokens to upper case
707 % 2 => convert non-quoted tokens to lower case
708 % 0 => do not convert non-quoted tokens
709 %
710 % o token: a character string containing the returned next token
711 %
712 % o max_token_length: the maximum size of "token". Characters beyond
713 % "max_token_length" are truncated.
714 %
715 % o string: the string to be parsed.
716 %
717 % o white: a string of the valid white spaces. example:
718 %
719 % char whitesp[]={" \t"};
720 %
721 % blank and tab will be valid white space.
722 %
723 % o break: a string of the valid break characters. example:
724 %
725 % char breakch[]={";,"};
726 %
727 % semicolon and comma will be valid break characters.
728 %
729 % o quote: a string of the valid quote characters. An example would be
730 %
731 % char whitesp[]={"'\"");
732 %
733 % (this causes single and double quotes to be valid) Note that a
734 % token starting with one of these characters needs the same quote
735 % character to terminate it.
736 %
737 % for example:
738 %
739 % "ABC '
740 %
741 % is unterminated, but
742 %
743 % "DEF" and 'GHI'
744 %
745 % are properly terminated. Note that different quote characters
746 % can appear on the same line; only for a given token do the quote
747 % characters have to be the same.
748 %
749 % o escape: the escape character (NOT a string ... only one
750 % allowed). Use zero if none is desired.
751 %
752 % o breaker: the break character used to terminate the current
753 % token. If the token was quoted, this will be the quote used. If
754 % the token is the last one on the line, this will be zero.
755 %
756 % o next: this variable points to the first character of the
757 % next token. it gets reset by "tokenizer" as it steps through the
758 % string. Set it to 0 upon initialization, and leave it alone
759 % after that. You can change it if you want to jump around in the
760 % string or re-parse from the beginning, but be careful.
761 %
762 % o quoted: set to True if the token was quoted and MagickFalse
763 % if not. You may need this information (for example: in C, a
764 % string with quotes around it is a character string, while one
765 % without is an identifier).
766 %
767 % o result: 0 if we haven't reached EOS (end of string), and 1
768 % if we have.
769 %
770 */
771 
772 #define IN_WHITE 0
773 #define IN_TOKEN 1
774 #define IN_QUOTE 2
775 #define IN_OZONE 3
776 
777 static ssize_t sindex(int c,const char *string)
778 {
779  const char
780  *p;
781 
782  for (p=string; *p != '\0'; p++)
783  if (c == (int) (*p))
784  return((ssize_t) (p-string));
785  return(-1);
786 }
787 
788 static void StoreToken(TokenInfo *token_info,char *string,
789  size_t max_token_length,int c)
790 {
791  ssize_t
792  i;
793 
794  if ((token_info->offset < 0) ||
795  ((size_t) token_info->offset >= (max_token_length-1)))
796  return;
797  i=token_info->offset++;
798  string[i]=(char) c;
799  if (token_info->state == IN_QUOTE)
800  return;
801  switch (token_info->flag & 0x03)
802  {
803  case 1:
804  {
805  string[i]=(char) LocaleToUppercase(c);
806  break;
807  }
808  case 2:
809  {
810  string[i]=(char) LocaleToLowercase(c);
811  break;
812  }
813  default:
814  break;
815  }
816 }
817 
818 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
819  char *token,const size_t max_token_length,const char *line,const char *white,
820  const char *break_set,const char *quote,const char escape,char *breaker,
821  int *next,char *quoted)
822 {
823  int
824  c;
825 
826  ssize_t
827  i;
828 
829  *breaker='\0';
830  *quoted='\0';
831  if (line[*next] == '\0')
832  return(1);
833  token_info->state=IN_WHITE;
834  token_info->quote=(char) MagickFalse;
835  token_info->flag=flag;
836  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
837  {
838  c=(int) line[*next];
839  i=sindex(c,break_set);
840  if (i >= 0)
841  {
842  switch (token_info->state)
843  {
844  case IN_WHITE:
845  case IN_TOKEN:
846  case IN_OZONE:
847  {
848  (*next)++;
849  *breaker=break_set[i];
850  token[token_info->offset]='\0';
851  return(0);
852  }
853  case IN_QUOTE:
854  {
855  StoreToken(token_info,token,max_token_length,c);
856  break;
857  }
858  }
859  continue;
860  }
861  i=sindex(c,quote);
862  if (i >= 0)
863  {
864  switch (token_info->state)
865  {
866  case IN_WHITE:
867  {
868  token_info->state=IN_QUOTE;
869  token_info->quote=quote[i];
870  *quoted=(char) MagickTrue;
871  break;
872  }
873  case IN_QUOTE:
874  {
875  if (quote[i] != token_info->quote)
876  StoreToken(token_info,token,max_token_length,c);
877  else
878  {
879  token_info->state=IN_OZONE;
880  token_info->quote='\0';
881  }
882  break;
883  }
884  case IN_TOKEN:
885  case IN_OZONE:
886  {
887  *breaker=(char) c;
888  token[token_info->offset]='\0';
889  return(0);
890  }
891  }
892  continue;
893  }
894  i=sindex(c,white);
895  if (i >= 0)
896  {
897  switch (token_info->state)
898  {
899  case IN_WHITE:
900  case IN_OZONE:
901  break;
902  case IN_TOKEN:
903  {
904  token_info->state=IN_OZONE;
905  break;
906  }
907  case IN_QUOTE:
908  {
909  StoreToken(token_info,token,max_token_length,c);
910  break;
911  }
912  }
913  continue;
914  }
915  if (c == (int) escape)
916  {
917  if (line[(*next)+1] == '\0')
918  {
919  *breaker='\0';
920  StoreToken(token_info,token,max_token_length,c);
921  (*next)++;
922  token[token_info->offset]='\0';
923  return(0);
924  }
925  switch (token_info->state)
926  {
927  case IN_WHITE:
928  {
929  (*next)--;
930  token_info->state=IN_TOKEN;
931  break;
932  }
933  case IN_TOKEN:
934  case IN_QUOTE:
935  {
936  (*next)++;
937  c=(int) line[*next];
938  StoreToken(token_info,token,max_token_length,c);
939  break;
940  }
941  case IN_OZONE:
942  {
943  token[token_info->offset]='\0';
944  return(0);
945  }
946  }
947  continue;
948  }
949  switch (token_info->state)
950  {
951  case IN_WHITE:
952  {
953  token_info->state=IN_TOKEN;
954  StoreToken(token_info,token,max_token_length,c);
955  break;
956  }
957  case IN_TOKEN:
958  case IN_QUOTE:
959  {
960  StoreToken(token_info,token,max_token_length,c);
961  break;
962  }
963  case IN_OZONE:
964  {
965  token[token_info->offset]='\0';
966  return(0);
967  }
968  }
969  }
970  token[token_info->offset]='\0';
971  return(0);
972 }
_TokenInfo
Definition: token.c:61