Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
rxp.c
1#include <stdio.h>
2#include <stdlib.h>
3#include <stdarg.h>
4#include "charset.h"
5#include "string16.h"
6#include "dtd.h"
7#include "input.h"
8#include "xmlparser.h"
9#include "stdio16.h"
10
11int attr_compare(const void *a, const void *b);
12void print_tree(Parser p, XBit bit);
13void print_bit(Parser p, XBit bit);
14void print_attrs(ElementDefinition e, Attribute a);
15void print_text(Char *text);
16void print_text_bit(Char *text);
17void dtd_cb(XBit bit, void *arg);
18InputSource entity_open(Entity ent, void *arg);
19
20int verbose = 0, expand = 0, bits = 0, silent = 0, nsgml = 0,
21 attr_defaults = 0, merge = 0, strict_xml = 0, tree = 0;
22char *enc_name = 0;
23CharacterEncoding encoding = CE_unknown;
24InputSource source = 0;
25
26int main(int argc, char **argv)
27{
28 int i;
29 Parser p;
30 char *s;
31 Entity ent = 0;
32
33 /* Sigh... guess which well-known system doesn't have getopt() */
34
35 for(i = 1; i < argc; i++)
36 {
37 if(argv[i][0] != '-')
38 break;
39 for(s = &argv[i][1]; *s; s++)
40 switch(*s)
41 {
42 case 'v':
43 verbose = 1;
44 break;
45 case 'a':
46 attr_defaults = 1;
47 break;
48 case 'e':
49 expand = 1;
50 break;
51 case 'b':
52 bits = 1;
53 break;
54 case 's':
55 silent = 1;
56 break;
57 case 'n':
58 nsgml = 1;
59 break;
60 case 'c':
61 enc_name = argv[++i];
62 break;
63 case 'm':
64 merge = 1;
65 break;
66 case 't':
67 tree = 1;
68 break;
69 case 'x':
70 strict_xml = 1;
71 attr_defaults = 1;
72 expand = 1;
73 break;
74 default:
76 "usage: rxp [-abemnstvx] [-c encoding] [url]\n");
77 return 1;
78 }
79 }
80
81 if(i < argc)
82 {
83 ent = NewExternalEntity(0, 0, strdup8(argv[i]), 0, 0);
84 if(ent)
85 source = EntityOpen(ent);
86 }
87 else
88 source = SourceFromStream("<stdin>", stdin);
89
90 if(!source)
91 return 1;
92
93 p = NewParser();
94 ParserSetEntityOpener(p, entity_open);
95
96 if(bits)
97 {
98 ParserSetDtdCallback(p, dtd_cb);
99 ParserSetCallbackArg(p, p);
100 }
101
102 if(attr_defaults)
103 ParserSetFlag(p, ReturnDefaultedAttributes, 1);
104
105 if(!expand)
106 {
107 ParserSetFlag(p, ExpandGeneralEntities, 0);
108 ParserSetFlag(p, ExpandCharacterEntities, 0);
109 }
110
111 if(merge)
112 ParserSetFlag(p, MergePCData, 1);
113
114 if(nsgml)
115 {
116 ParserSetFlag(p, XMLPiEnd, 0);
117 ParserSetFlag(p, XMLEmptyTagEnd, 0);
118 ParserSetFlag(p, XMLPredefinedEntities, 0);
119 ParserSetFlag(p, XMLExternalIDs, 0);
120 ParserSetFlag(p, XMLMiscWFErrors, 0);
121 ParserSetFlag(p, TrustSDD, 0);
122 ParserSetFlag(p, ErrorOnUnquotedAttributeValues, 0);
123 ParserSetFlag(p, ExpandGeneralEntities, 0);
124 ParserSetFlag(p, ExpandCharacterEntities, 0);
125/* ParserSetFlag(p, TrimPCData, 1); */
126 }
127
128 if(strict_xml)
129 {
130 ParserSetFlag(p, ErrorOnBadCharacterEntities, 1);
131 ParserSetFlag(p, ErrorOnUndefinedEntities, 1);
132 ParserSetFlag(p, XMLStrictWFErrors, 1);
133 ParserSetFlag(p, WarnOnUndefinedElements, 0);
134 ParserSetFlag(p, WarnOnUndefinedAttributes, 0);
135 ParserSetFlag(p, WarnOnRedefinitions, 0);
136
137 }
138
139 if(ParserPush(p, source) == -1)
140 {
141 ParserPerror(p, &p->xbit);
142 return 1;
143 }
144
145 if(enc_name)
146 {
147 encoding = FindEncoding(enc_name);
148
149 if(encoding == CE_unknown)
150 {
151 fprintf(stderr, "unknown encoding %s\n", enc_name);
152 return 1;
153 }
154 }
155 else if(strict_xml)
156 encoding = CE_UTF_8;
157 else
158 encoding = source->entity->encoding;
159
160 SetFileEncoding(Stdout, encoding);
161
162 if(verbose)
163 fprintf(stderr, "Input encoding %s, output encoding %s\n",
164 CharacterEncodingNameAndByteOrder[source->entity->encoding],
165 CharacterEncodingNameAndByteOrder[encoding]);
166
167 if(!silent && !strict_xml && source->entity->ml_decl == ML_xml && !bits)
168 {
169 Printf("<?xml");
170
171 if(source->entity->version_decl)
172 Printf(" version=\"%s\"", source->entity->version_decl);
173
174 if(encoding == CE_unspecified_ascii_superset)
175 {
176 if(source->entity->encoding_decl != CE_unknown)
177 Printf(" encoding=\"%s\"",
178 CharacterEncodingName[source->entity->encoding_decl]);
179 }
180 else
181 Printf(" encoding=\"%s\"",
182 CharacterEncodingName[encoding]);
183
184 if(source->entity->standalone_decl != SDD_unspecified)
185 Printf(" standalone=\"%s\"",
186 StandaloneDeclarationName[source->entity->standalone_decl]);
187
188 Printf("?>\n");
189 }
190
191 while(1)
192 {
193 XBit bit;
194
195 if(tree)
196 {
197 bit = ReadXTree(p);
198 print_tree(p, bit);
199 }
200 else
201 {
202 bit = ReadXBit(p);
203 print_bit(p, bit);
204 }
205 if(bit->type == XBIT_eof)
206 {
207 if(!silent && !strict_xml && !bits)
208 Printf("\n");
209
210 /* Not necessary, but helps me check for leaks */
211 FreeDtd(p->dtd);
212 FreeParser(p);
213 if(ent)
214 FreeEntity(ent);
215 return 0;
216 }
217 if(bit->type == XBIT_error)
218 return 1;
219 if(tree)
220 FreeXTree(bit);
221 else
222 FreeXBit(bit);
223 }
224}
225
226void print_tree(Parser p, XBit bit)
227{
228 int i;
229 struct xbit endbit;
230
231 print_bit(p, bit);
232 if(bit->type == XBIT_start)
233 {
234 for(i=0; i<bit->nchildren; i++)
235 print_tree(p, bit->children[i]);
236 endbit.type = XBIT_end;
237 endbit.element_definition = bit->element_definition;
238 print_bit(p, &endbit);
239 }
240}
241
242void print_bit(Parser p, XBit bit)
243{
244 const char *sys, *pub;
245
246 if(silent && bit->type != XBIT_error)
247 return;
248
249 if(bits)
250 {
251 Printf("At %d: ", bit->byte_offset);
252 switch(bit->type)
253 {
254 case XBIT_eof:
255 Printf("EOF\n");
256 break;
257 case XBIT_error:
258 ParserPerror(p, bit);
259 break;
260 case XBIT_dtd:
261 sys = pub = "<none>";
262 if(p->dtd->external_part)
263 {
264 if(p->dtd->external_part->publicid)
265 pub = p->dtd->external_part->publicid;
266 if(p->dtd->external_part->systemid)
267 sys = p->dtd->external_part->systemid;
268 }
269 Printf("doctype: %S pubid %s sysid %s\n", p->dtd->name, pub, sys);
270 break;
271 case XBIT_start:
272 Printf("start: %S ", bit->element_definition->name);
273 print_attrs(0, bit->attributes);
274 Printf("\n");
275 break;
276 case XBIT_empty:
277 Printf("empty: %S ", bit->element_definition->name);
278 print_attrs(0, bit->attributes);
279 Printf("\n");
280 break;
281 case XBIT_end:
282 Printf("end: %S\n", bit->element_definition->name);
283 break;
284 case XBIT_pi:
285 Printf("pi: %S: ", bit->pi_name);
286 print_text_bit(bit->pi_chars);
287 Printf("\n");
288 break;
289 case XBIT_cdsect:
290 Printf("cdata: ");
291 print_text_bit(bit->cdsect_chars);
292 Printf("\n");
293 break;
294 case XBIT_pcdata:
295 Printf("pcdata: ");
296 print_text_bit(bit->pcdata_chars);
297 Printf("\n");
298 break;
299 case XBIT_comment:
300 Printf("comment: ");
301 print_text_bit(bit->comment_chars);
302 Printf("\n");
303 break;
304 default:
305 fprintf(stderr, "***%s\n", XBitTypeName[bit->type]);
306 exit(1);
307 break;
308 }
309 }
310 else
311 {
312 switch(bit->type)
313 {
314 case XBIT_eof:
315 break;
316 case XBIT_error:
317 ParserPerror(p, bit);
318 break;
319 case XBIT_dtd:
320 if(strict_xml)
321 /* no doctype in canonical XML */
322 break;
323 Printf("<!DOCTYPE %S", p->dtd->name);
324 if(p->dtd->external_part)
325 {
326 if(p->dtd->external_part->publicid)
327 Printf(" PUBLIC \"%s\"", p->dtd->external_part->publicid);
328 else if(p->dtd->external_part->systemid)
329 Printf(" SYSTEM");
330 if(p->dtd->external_part->systemid)
331 Printf(" \"%s\"", p->dtd->external_part->systemid);
332 }
333 if(p->dtd->internal_part)
334 Printf(" [%S]", p->dtd->internal_part->text);
335 Printf(">\n");
336 break;
337 case XBIT_start:
338 case XBIT_empty:
339 Printf("<%S", bit->element_definition->name);
340 print_attrs(bit->element_definition, bit->attributes);
341 if(bit->type == XBIT_start)
342 Printf(">");
343 else if(strict_xml)
344 Printf("></%S>", bit->element_definition->name);
345 else
346 Printf("/>");
347 break;
348 case XBIT_end:
349 Printf("</%S>", bit->element_definition->name);
350 break;
351 case XBIT_pi:
352 Printf("<?%S %S%s",
353 bit->pi_name, bit->pi_chars, nsgml ? ">" : "?>");
354 if(p->state <= PS_prolog2 && !strict_xml)
355 Printf("\n");
356 break;
357 case XBIT_cdsect:
358 if(strict_xml)
359 /* Print CDATA sections as plain PCDATA in canonical XML */
360 print_text(bit->cdsect_chars);
361 else
362 Printf("<![CDATA[%S]]>", bit->cdsect_chars);
363 break;
364 case XBIT_pcdata:
365 print_text(bit->pcdata_chars);
366 break;
367 case XBIT_comment:
368 if(strict_xml)
369 /* no comments in canonical XML */
370 break;
371 Printf("<!--%S-->", bit->comment_chars);
372 if(p->state <= PS_prolog2)
373 Printf("\n");
374 break;
375 default:
376 fprintf(stderr, "\n***%s\n", XBitTypeName[bit->type]);
377 exit(1);
378 break;
379 }
380 }
381}
382
383int attr_compare(const void *a, const void *b)
384{
385 return Strcmp((*(Attribute *)a)->definition->name,
386 (*(Attribute *)b)->definition->name);
387}
388
389void print_attrs(ElementDefinition e, Attribute a)
390{
391 Attribute b;
392 Attribute *aa;
393 int i, n = 0;
394
395 for(b=a; b; b=b->next)
396 n++;
397
398 if(n == 0)
399 return;
400
401 aa = malloc(n * sizeof(*aa));
402
403 for(i=0, b=a; b; i++, b=b->next)
404 aa[i] = b;
405
406 if(strict_xml)
407 qsort((void *)aa, n, sizeof(*aa), attr_compare);
408
409 for(i=0; i<n; i++)
410 {
411 Printf(" %S=\"", aa[i]->definition->name);
412 print_text(aa[i]->value);
413 Printf("\"");
414 }
415
416 free(aa);
417}
418
419void print_text_bit(Char *text)
420{
421 int i;
422
423 for(i=0; i<50 && text[i]; i++)
424 if(text[i] == '\n' || text[i] == '\r')
425 text[i] = '~';
426 Printf("%.50S", text);
427}
428
429void dtd_cb(XBit bit, void *arg)
430{
431 Printf("In DTD: ");
432 print_bit(arg, bit);
433}
434
435void print_text(Char *text)
436{
437 Char *pc, *last;
438
439 if(bits)
440 {
441 Printf("%S", text);
442 return;
443 }
444
445 for(pc = last = text; *pc; pc++)
446 {
447 if(*pc == '&' || *pc == '<' || *pc == '>' || *pc == '"' ||
448 (strict_xml && (*pc == 9 || *pc == 10 || *pc == 13)))
449 {
450 if(pc > last)
451 Printf("%.*S", pc - last, last);
452 switch(*pc)
453 {
454 case '<':
455 Printf("&lt;");
456 break;
457 case '>':
458 Printf("&gt;");
459 break;
460 case '&':
461 Printf("&amp;");
462 break;
463 case '"':
464 Printf("&quot;");
465 break;
466 case 9:
467 Printf("&#9;");
468 break;
469 case 10:
470 Printf("&#10;");
471 break;
472 case 13:
473 Printf("&#13;");
474 break;
475 }
476 last = pc+1;
477 }
478 }
479
480 if(pc > last)
481 Printf("%.*S", pc - last, last);
482}
483
484InputSource entity_open(Entity ent, void *arg)
485{
486 if(ent->publicid &&
487 strcmp(ent->publicid, "-//RMT//DTD just a test//EN") == 0)
488 {
489 FILE *f;
490 FILE16 *f16;
491
492 if((f = fopen("/tmp/mydtd", "r")))
493 {
494 if(!(f16 = MakeFILE16FromFILE(f, "r")))
495 return 0;
496 SetCloseUnderlying(f16, 1);
497
498 return NewInputSource(ent, f16);
499 }
500 }
501
502 return EntityOpen(ent);
503}
504