26 #include "lt-memory.h" 29 #define ERR(m) LT_ERROR(NECHAR,m) 30 #define ERR1(m,x) LT_ERROR1(NECHAR,m,x) 31 #define ERR2(m,x,y) LT_ERROR2(NECHAR,m,x,y) 32 #define ERR3(m,x,y,z) LT_ERROR3(NECHAR,m,x,y,z) 35 #define Realloc srealloc 41 #define ERR(m) fprintf(stderr,m) 42 #define ERR1(m,x) fprintf(stderr,m,x) 43 #define ERR2(m,x,y) fprintf(stderr,m,x,y) 44 #define ERR3(m,x,y,z) fprintf(stderr,m,x,y,z) 55 static int get_translated_line1(InputSource s);
76 if(!url || !(f16 =
url_open(url, 0,
"r", 0)))
92 if(!(source =
Malloc(
sizeof(*source))))
96 source->line_alloc = 0;
97 source->line_length = 0;
103 source->file16 = f16;
105 source->bytes_consumed = 0;
106 source->bytes_before_current_line = 0;
107 source->line_end_was_cr = 0;
108 source->line_number = 0;
109 source->not_read_yet = 1;
111 source->nextin = source->insize = 0;
120 Entity e = s->entity,
f = e->parent;
124 *linenum = s->line_number;
131 if(e->matches_parent_text)
133 *linenum = e->line_offset + s->line_number;
134 *charnum = (s->line_number == 0 ? e->line1_char_offset : 0) +
140 *linenum = e->line_offset;
141 *charnum = e->line1_char_offset;
146 if(
f &&
f->matches_parent_text)
148 *linenum =
f->line_offset + e->line_offset;
149 *charnum = (e->line_offset == 0 ?
f->line1_char_offset : 0) +
150 e->line1_char_offset;
166 return s->bytes_before_current_line + s->next;
168 switch(s->entity->encoding)
174 return s->bytes_before_current_line + 2 * s->next;
185 return s->bytes_before_current_line + s->next;
187 if(s->complicated_utf8_line)
191 for(i = 0; i < s->next; i++)
198 else if(c >= 0xd800 && c <= 0xdfff)
203 else if(c <= 0x1ffff)
205 else if(c <= 0x3ffffff)
211 return s->bytes_before_current_line + n;
214 return s->bytes_before_current_line + s->next;
226 s->bytes_consumed = s->bytes_before_current_line = offset;
227 s->nextin = s->insize = 0;
229 s->line_number = -999999;
233 static int get_translated_line(InputSource s)
239 int handle2, handle3;
243 Entity e = s->entity;
245 struct _FILE16 *f16 = (
struct _FILE16 *)s->file16;
249 return get_translated_line1(s);
251 if(!*(Char *)((
char *)f16->handle + f16->handle2))
257 s->line = (Char *)((
char *)f16->handle + f16->handle2);
258 for(p=s->line; *p && *p !=
'\n'; p++)
262 f16->handle2 = (
char *)p - (
char *)f16->handle;
263 s->line_length = p - s->line;
265 s->bytes_before_current_line = f16->handle2;
270 static int get_translated_line1(InputSource s)
273 unsigned char *inbuf = s->inbuf;
274 int nextin = s->nextin, insize = s->insize;
275 int startin = s->nextin;
276 Char *outbuf = s->line;
277 int outsize = s->line_alloc;
280 int ignore_linefeed = s->line_end_was_cr;
287 s->complicated_utf8_line = 0;
294 s->line_end_was_cr = 0;
295 s->bytes_before_current_line = s->bytes_consumed;
300 if(outsize < nextout + (insize - nextin))
302 outsize = nextout + (insize - nextin);
303 outbuf =
Realloc(outbuf, outsize *
sizeof(Char));
306 while(nextin < insize)
315 if(nextin+2 > insize)
317 c = (inbuf[nextin] << 8) + inbuf[nextin+1];
322 if(nextin+2 > insize)
324 c = (inbuf[nextin+1] << 8) + inbuf[nextin];
339 c = to_unicode[inbuf[nextin++]];
340 if(c == (
unsigned int)-1)
341 ERR3(
"Illegal %s character <0x%x> " 342 "at file offset %d\n",
344 s->bytes_consumed + nextin - 1 - startin);
350 if(c <= 0xc0 || c >= 0xfe)
352 ERR2(
"Illegal UTF-8 start byte <0x%x> " 353 "at file offset %d\n",
354 c, s->bytes_consumed + nextin - 1 - startin);
382 if(nextin+more > insize)
387 s->complicated_utf8_line = 1;
388 for(i=0; i<more; i++)
389 c = (c << 6) + (inbuf[nextin++] & 0x3f);
392 ERR(
"read from entity with unsupported encoding!\n");
398 c < 0xd800 || c > 0xdfff)
402 ERR2(
"Error: illegal character <0x%x> " 403 "immediately before file offset %d\n",
404 c, s->bytes_consumed + nextin - startin);
408 if(c ==
'\n' && ignore_linefeed)
412 s->bytes_before_current_line += (nextin - startin);
419 s->line_end_was_cr = 1;
427 outbuf[nextout++] = ((c - 0x10000) >> 10) + 0xd800;
428 outbuf[nextout++] = ((c - 0x10000) & 0x3ff) + 0xdc00;
431 outbuf[nextout++] = c;
433 outbuf[nextout++] = c;
440 s->bytes_consumed += (nextin - startin);
442 s->line_alloc = outsize;
443 s->line_length = nextout;
453 remaining = insize - nextin;
454 for(i=0; i<remaining; i++)
455 inbuf[i] = inbuf[nextin + i];
460 s->bytes_consumed += (nextin - startin);
462 insize =
Readu(s->file16,
463 inbuf+insize-nextin,
sizeof(s->inbuf)-remaining);
464 nextin = startin = 0;
471 s->line_alloc = outsize;
472 s->line_length = nextout;
482 Entity e = s->entity;
484 unsigned char *b = (
unsigned char *)s->inbuf;
486 b[0] = b[1] = b[2] = b[3] = 0;
490 nread =
Readu(s->file16, s->inbuf + s->insize, 4 - s->insize);
499 if(b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] ==
'<')
500 e->encoding = CE_ISO_10646_UCS_4B;
501 else if(b[0] ==
'<' && b[1] == 0 && b[2] == 0 && b[3] == 0)
502 e->encoding = CE_ISO_10646_UCS_4L;
505 if(b[0] == 0xfe && b[1] == 0xff)
510 else if(b[0] == 0 && b[1] ==
'<' && b[2] == 0 && b[3] ==
'?')
512 else if(b[0] == 0xff && b[1] == 0xfe)
517 else if(b[0] ==
'<' && b[1] == 0 && b[2] ==
'?' && b[3] == 0)
531 assert(!s->seen_eoe);
533 if(get_translated_line(s) != 0)
537 ERR1(
"I/O error on stream <%s>, ignore further errors\n",
541 s->line_length = s->next;
546 if(s->line_length == 0)
549 s->line_length = s->next;
561 return s->line[s->next++];
XML_API void EntitySetBaseURL(Entity e, const char8 *url)
STD_API const char8 * CharacterEncodingName[CE_enum_count]
STD_API FILE16 * MakeFILE16FromFILE(FILE *f, const char *type)
STD_API char8 *EXPRT default_base_url(void)
XML_API const char8 * EntityDescription(Entity e)
#define NewExternalEntity(name, pub, sys, nnot, parent)
STD_API int Readu(FILE16 *file, unsigned char *buf, int max_count)
STD_API int Fseek(FILE16 *file, long offset, int ptrname)
enum character_encoding CharacterEncoding
STD_API FILE16 * url_open(const char8 *url, const char8 *base, const char8 *type, char8 **merged_url)
XML_API const char8 * EntityURL(Entity e)
void * Realloc(void *mem, int bytes)
STD_API int iso_to_unicode[8][256]
STD_API FILE16 * MakeFILE16FromString(void *buf, long size, const char *type)