MagickWand  7.1.1-43
Convert, Edit, Or Compose Bitmap Images
script-token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % SSS CCC RRRR III PPPP TTTTT TTTTT OOO K K EEEE N N %
6 % S C R R I P P T T O O K K E NN N %
7 % SSS C RRRR I PPPP T T O O KK EEE N N N %
8 % S C R R I P T T O O K K E N NN %
9 % SSSS CCC R RR III P T T OOO K K EEEE N N %
10 % %
11 % Tokenize Magick Script into Options %
12 % %
13 % Dragon Computing %
14 % Anthony Thyssen %
15 % January 2012 %
16 % %
17 % %
18 % Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization %
19 % dedicated to making software imaging solutions freely available. %
20 % %
21 % You may not use this file except in compliance with the License. You may %
22 % obtain a copy of the License at %
23 % %
24 % https://imagemagick.org/script/license.php %
25 % %
26 % Unless required by applicable law or agreed to in writing, software %
27 % distributed under the License is distributed on an "AS IS" BASIS, %
28 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
29 % See the License for the specific language governing permissions and %
30 % limitations under the License. %
31 % %
32 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
33 %
34 % Read a stream of characters and return tokens one at a time.
35 %
36 % The input stream is divided into individual 'tokens' (representing 'words'
37 % or 'options'), in a way that is as close to a UNIX shell, as is feasible.
38 % Only shell variable, and command substitutions will not be performed.
39 % Tokens can be any length.
40 %
41 % The main function call is GetScriptToken() (see below) which returns one
42 % and only one token at a time. The other functions provide support to this
43 % function, opening scripts, and setting up the required structures.
44 %
45 % More specifically...
46 %
47 % Tokens are white space separated, and may be quoted, or even partially
48 % quoted by either single or double quotes, or the use of backslashes,
49 % or any mix of the three.
50 %
51 % For example: This\ is' a 'single" token"
52 %
53 % A token is returned immediately the end of token is found. That is as soon
54 % as a unquoted white-space or EOF condition has been found. That is to say
55 % the file stream is parsed purely character-by-character, regardless any
56 % buffering constraints set by the system. It is not parsed line-by-line.
57 %
58 % The function will return 'MagickTrue' if a valid token was found, while
59 % the token status will be set accordingly to 'OK' or 'EOF', according to
60 % the cause of the end of token. The token may be an empty string if the
61 % input was a quoted empty string. Other error conditions return a value of
62 % MagickFalse, indicating any token found but was incomplete due to some
63 % error condition.
64 %
65 % Single quotes will preserve all characters including backslashes. Double
66 % quotes will also preserve backslashes unless escaping a double quote,
67 % or another backslashes. Other shell meta-characters are not treated as
68 % special by this tokenizer.
69 %
70 % For example Quoting the quote chars:
71 % \' "'" \" '"' "\"" \\ '\' "\\"
72 %
73 % Outside quotes, backslash characters will make spaces, tabs and quotes part
74 % of a token returned. However a backslash at the end of a line (and outside
75 % quotes) will cause the newline to be completely ignored (as per the shell
76 % line continuation).
77 %
78 % Comments start with a '#' character at the start of a new token, will be
79 % completely ignored upto the end of line, regardless of any backslash at the
80 % end of the line. You can escape a comment '#', using quotes or backslashes
81 % just as you can in a shell.
82 %
83 % The parser will accept both newlines, returns, or return-newlines to mark
84 % the EOL. Though this is technically breaking (or perhaps adding to) the
85 % 'BASH' syntax that is being followed.
86 %
87 %
88 % UNIX script Launcher...
89 %
90 % The use of '#' comments allow normal UNIX 'scripting' to be used to call on
91 % the "magick" command to parse the tokens from a file
92 %
93 % #!/path/to/command/magick -script
94 %
95 %
96 % UNIX 'env' command launcher...
97 %
98 % If "magick" is renamed "magick-script" you can use a 'env' UNIX launcher
99 %
100 % #!/usr/bin/env magick-script
101 %
102 %
103 % Shell script launcher...
104 %
105 % As a special case a ':' at the start of a line is also treated as a comment
106 % This allows a magick script to ignore a line that can be parsed by the shell
107 % and not by the magick script (tokenizer). This allows for an alternative
108 % script 'launcher' to be used for magick scripts.
109 %
110 % #!/bin/sh
111 % :; exec magick -script "$0" "$@"; exit 10
112 % #
113 % # The rest of the file is magick script
114 % -read label:"This is a Magick Script!"
115 % -write show: -exit
116 %
117 % Or with some shell pre/post processing...
118 %
119 % #!/bin/sh
120 % :; echo "This part is run in the shell, but ignored by Magick"
121 % :; magick -script "$0" "$@"
122 % :; echo "This is run after the "magick" script is finished!"
123 % :; exit 10
124 % #
125 % # The rest of the file is magick script
126 % -read label:"This is a Magick Script!"
127 % -write show: -exit
128 %
129 %
130 % DOS script launcher...
131 %
132 % Similarly any '@' at the start of the line (outside of quotes) will also be
133 % treated as comment. This allow you to create a DOS script launcher, to
134 % allow a ".bat" DOS scripts to run as "magick" scripts instead.
135 %
136 % @echo This line is DOS executed but ignored by Magick
137 % @magick -script %~dpnx0 %*
138 % @echo This line is processed after the Magick script is finished
139 % @GOTO :EOF
140 % #
141 % # The rest of the file is magick script
142 % -read label:"This is a Magick Script!"
143 % -write show: -exit
144 %
145 % But this can also be used as a shell script launcher as well!
146 % Though is more restrictive and less free-form than using ':'.
147 %
148 % #!/bin/sh
149 % @() { exec magick -script "$@"; }
150 % @ "$0" "$@"; exit
151 % #
152 % # The rest of the file is magick script
153 % -read label:"This is a Magick Script!"
154 % -write show: -exit
155 %
156 % Or even like this...
157 %
158 % #!/bin/sh
159 % @() { }
160 % @; exec magick -script "$0" "$@"; exit
161 % #
162 % # The rest of the file is magick script
163 % -read label:"This is a Magick Script!"
164 % -write show: -exit
165 %
166 */
167 
168 /*
169  Include declarations.
170 
171  NOTE: Do not include if being compiled into the "test/script-token-test.c"
172  module, for low level token testing.
173 */
174 #ifndef SCRIPT_TOKEN_TESTING
175 # include "MagickWand/studio.h"
176 # include "MagickWand/MagickWand.h"
177 # include "MagickWand/script-token.h"
178 # include "MagickCore/string-private.h"
179 # include "MagickCore/utility-private.h"
180 #endif
181 
182 /*
183 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
184 % %
185 % %
186 % %
187 % A c q u i r e S c r i p t T o k e n I n f o %
188 % %
189 % %
190 % %
191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
192 %
193 % AcquireScriptTokenInfo() allocated, initializes and opens the given
194 % file stream from which tokens are to be extracted.
195 %
196 % The format of the AcquireScriptTokenInfo method is:
197 %
198 % ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
199 %
200 % A description of each parameter follows:
201 %
202 % o filename the filename to open ("-" means stdin)
203 %
204 */
205 WandExport ScriptTokenInfo *AcquireScriptTokenInfo(const char *filename)
206 {
208  *token_info;
209 
210  token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
211  if (token_info == (ScriptTokenInfo *) NULL)
212  return token_info;
213  (void) memset(token_info,0,sizeof(*token_info));
214 
215  token_info->opened=MagickFalse;
216  if ( LocaleCompare(filename,"-") == 0 ) {
217  token_info->stream=stdin;
218  token_info->opened=MagickFalse;
219  }
220  else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
221  token_info->stream=fdopen(StringToLong(filename+3),"r");
222  token_info->opened=MagickFalse;
223  }
224  else {
225  token_info->stream=fopen_utf8(filename, "r");
226  }
227  if ( token_info->stream == (FILE *) NULL ) {
228  token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
229  return(token_info);
230  }
231 
232  token_info->curr_line=1;
233  token_info->length=INITAL_TOKEN_LENGTH;
234  token_info->token=(char *) AcquireQuantumMemory(1,token_info->length);
235 
236  token_info->status=(token_info->token != (char *) NULL)
237  ? TokenStatusOK : TokenStatusMemoryFailed;
238  token_info->signature=MagickWandSignature;
239 
240  return token_info;
241 }
242 
243 /*
244 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
245 % %
246 % %
247 % %
248 % D e s t r o y S c r i p t T o k e n I n f o %
249 % %
250 % %
251 % %
252 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
253 %
254 % DestroyScriptTokenInfo() allocated, initializes and opens the given
255 % file stream from which tokens are to be extracted.
256 %
257 % The format of the DestroyScriptTokenInfo method is:
258 %
259 % ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
260 %
261 % A description of each parameter follows:
262 %
263 % o token_info The ScriptTokenInfo structure to be destroyed
264 %
265 */
266 WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
267 {
268  assert(token_info != (ScriptTokenInfo *) NULL);
269  assert(token_info->signature == MagickWandSignature);
270 
271  if ( token_info->opened != MagickFalse )
272  fclose(token_info->stream);
273 
274  if (token_info->token != (char *) NULL )
275  token_info->token=(char *) RelinquishMagickMemory(token_info->token);
276  token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
277  return(token_info);
278 }
279 
280 /*
281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
282 % %
283 % %
284 % %
285 % G e t S c r i p t T o k e n %
286 % %
287 % %
288 % %
289 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
290 %
291 % GetScriptToken() a fairly general, finite state token parser. That returns
292 % tokens one at a time, as soon as possible.
293 %
294 %
295 % The format of the GetScriptToken method is:
296 %
297 % MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
298 %
299 % A description of each parameter follows:
300 %
301 % o token_info pointer to a structure holding token details
302 %
303 */
304 /* States of the parser */
305 #define IN_WHITE 0
306 #define IN_TOKEN 1
307 #define IN_QUOTE 2
308 #define IN_COMMENT 3
309 
310 /* Macro to read character from stream
311 
312  This also keeps track of the line and column counts.
313  The EOL is defined as either '\r\n', or '\r', or '\n'.
314  A '\r' on its own is converted into a '\n' to correctly handle
315  raw input, typically due to 'copy-n-paste' of text files.
316  But a '\r\n' sequence is left ASIS for string handling
317 */
318 #define GetChar(c) \
319 { \
320  c=fgetc(token_info->stream); \
321  token_info->curr_column++; \
322  if ( c == '\r' ) { \
323  c=fgetc(token_info->stream); \
324  ungetc(c,token_info->stream); \
325  c = (c!='\n')?'\n':'\r'; \
326  } \
327  if ( c == '\n' ) \
328  token_info->curr_line++, token_info->curr_column=0; \
329  if (c == EOF ) \
330  break; \
331  if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
332  token_info->status=TokenStatusBinary; \
333  break; \
334  } \
335 }
336 /* macro to collect the token characters */
337 #define SaveChar(c) \
338 { \
339  if ((size_t) offset >= (token_info->length-1)) { \
340  if (token_info == (ScriptTokenInfo *) NULL) \
341  break; \
342  if ( token_info->length >= MagickPathExtent ) \
343  token_info->length += MagickPathExtent; \
344  else \
345  token_info->length *= 4; \
346  token_info->token=(char *) ResizeQuantumMemory(token_info->token, \
347  token_info->length,sizeof(*token_info->token)); \
348  if ( token_info->token == (char *) NULL ) { \
349  token_info->status=TokenStatusMemoryFailed; \
350  break; \
351  } \
352  } \
353  if ( token_info->token == (char *) NULL ) \
354  token_info->status=TokenStatusMemoryFailed; \
355  else \
356  token_info->token[offset++]=(char) (c); \
357 }
358 
359 WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
360 {
361  int
362  quote,
363  c;
364 
365  int
366  state;
367 
368  ssize_t
369  offset;
370 
371  /* EOF - no more tokens! */
372  if (token_info == (ScriptTokenInfo *) NULL)
373  return(MagickFalse);
374  if (token_info->status != TokenStatusOK)
375  {
376  token_info->token[0]='\0';
377  return(MagickFalse);
378  }
379  state=IN_WHITE;
380  quote='\0';
381  offset=0;
382 DisableMSCWarning(4127)
383  while(1)
384 RestoreMSCWarning
385  {
386  /* get character */
387  GetChar(c);
388 
389  /* hash comment handling */
390  if ( state == IN_COMMENT ) {
391  if ( c == '\n' )
392  state=IN_WHITE;
393  continue;
394  }
395  /* comment lines start with '#' anywhere, or ':' or '@' at start of line */
396  if ( state == IN_WHITE )
397  if ( ( c == '#' ) ||
398  ( token_info->curr_column==1 && (c == ':' || c == '@' ) ) )
399  state=IN_COMMENT;
400  /* whitespace token separator character */
401  if (strchr(" \n\r\t",c) != (char *) NULL) {
402  switch (state) {
403  case IN_TOKEN:
404  token_info->token[offset]='\0';
405  return(MagickTrue);
406  case IN_QUOTE:
407  SaveChar(c);
408  break;
409  }
410  continue;
411  }
412  /* quote character */
413  if ( c=='\'' || c =='"' ) {
414  switch (state) {
415  case IN_WHITE:
416  token_info->token_line=token_info->curr_line;
417  token_info->token_column=token_info->curr_column;
418  magick_fallthrough;
419  case IN_TOKEN:
420  state=IN_QUOTE;
421  quote=c;
422  break;
423  case IN_QUOTE:
424  if (c == quote)
425  {
426  state=IN_TOKEN;
427  quote='\0';
428  }
429  else
430  SaveChar(c);
431  break;
432  }
433  continue;
434  }
435  /* escape char (preserve in quotes - unless escaping the same quote) */
436  if (c == '\\')
437  {
438  if ( state==IN_QUOTE && quote == '\'' ) {
439  SaveChar('\\');
440  continue;
441  }
442  GetChar(c);
443  if (c == '\n')
444  switch (state) {
445  case IN_COMMENT:
446  state=IN_WHITE; /* end comment */
447  magick_fallthrough;
448  case IN_QUOTE:
449  if (quote != '"')
450  break; /* in double quotes only */
451  magick_fallthrough;
452  case IN_WHITE:
453  case IN_TOKEN:
454  continue; /* line continuation - remove line feed */
455  }
456  switch (state) {
457  case IN_WHITE:
458  token_info->token_line=token_info->curr_line;
459  token_info->token_column=token_info->curr_column;
460  state=IN_TOKEN;
461  break;
462  case IN_QUOTE:
463  if (c != quote && c != '\\')
464  SaveChar('\\');
465  break;
466  }
467  SaveChar(c);
468  continue;
469  }
470  /* ordinary character */
471  switch (state) {
472  case IN_WHITE:
473  token_info->token_line=token_info->curr_line;
474  token_info->token_column=token_info->curr_column;
475  state=IN_TOKEN;
476  magick_fallthrough;
477  case IN_TOKEN:
478  case IN_QUOTE:
479  SaveChar(c);
480  break;
481  case IN_COMMENT:
482  break;
483  }
484  }
485  /* input stream has EOF or produced a fatal error */
486  token_info->token[offset]='\0';
487  if ( token_info->status != TokenStatusOK )
488  return(MagickFalse); /* fatal condition - no valid token */
489  token_info->status = TokenStatusEOF;
490  if ( state == IN_QUOTE)
491  token_info->status = TokenStatusBadQuotes;
492  if ( state == IN_TOKEN)
493  return(MagickTrue); /* token with EOF at end - no problem */
494  return(MagickFalse); /* in white space or in quotes - invalid token */
495 }
ScriptTokenInfo
Definition: script-token.h:39