Skip to content

Commit 53a11be

Browse files
committed
Allow XML fragment to contain a XML declaration. For that, we need a small
hand-crafted parser for the XML declaration, because libxml doesn't seem to allow this.
1 parent 324297d commit 53a11be

File tree

1 file changed

+127
-7
lines changed
  • src/backend/utils/adt

1 file changed

+127
-7
lines changed

src/backend/utils/adt/xml.c

Lines changed: 127 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -489,6 +489,122 @@ xml_init(void)
489489
}
490490

491491

492+
/*
493+
* SQL/XML allows storing "XML documents" or "XML content". "XML
494+
* documents" are specified by the XML specification and are parsed
495+
* easily by libxml. "XML content" is specified by SQL/XML as the
496+
* production "XMLDecl? content". But libxml can only parse the
497+
* "content" part, so we have to parse the XML declaration ourselves
498+
* to complete this.
499+
*/
500+
501+
#define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
502+
#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
503+
504+
static int
505+
parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
506+
{
507+
const xmlChar *p;
508+
const xmlChar *save_p;
509+
510+
p = str;
511+
512+
if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
513+
goto finished;
514+
515+
p += 5;
516+
517+
/* version */
518+
CHECK_XML_SPACE(p);
519+
SKIP_XML_SPACE(p);
520+
if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
521+
return XML_ERR_VERSION_MISSING;
522+
p += 7;
523+
SKIP_XML_SPACE(p);
524+
if (*p != '=')
525+
return XML_ERR_VERSION_MISSING;
526+
p += 1;
527+
SKIP_XML_SPACE(p);
528+
if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
529+
return XML_ERR_VERSION_MISSING;
530+
p += 5;
531+
532+
/* encoding */
533+
save_p = p;
534+
SKIP_XML_SPACE(p);
535+
if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
536+
{
537+
CHECK_XML_SPACE(save_p);
538+
p += 8;
539+
SKIP_XML_SPACE(p);
540+
if (*p != '=')
541+
return XML_ERR_MISSING_ENCODING;
542+
p += 1;
543+
SKIP_XML_SPACE(p);
544+
545+
if (*p == '\'' || *p == '"')
546+
{
547+
const xmlChar *q;
548+
549+
q = xmlStrchr(p + 1, *p);
550+
if (!q)
551+
return XML_ERR_MISSING_ENCODING;
552+
553+
*encoding = xmlStrndup(p + 1, q - p - 1);
554+
p = q + 1;
555+
}
556+
else
557+
return XML_ERR_MISSING_ENCODING;
558+
}
559+
else
560+
{
561+
p = save_p;
562+
*encoding = NULL;
563+
}
564+
565+
/* standalone */
566+
save_p = p;
567+
SKIP_XML_SPACE(p);
568+
if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
569+
{
570+
CHECK_XML_SPACE(save_p);
571+
p += 10;
572+
SKIP_XML_SPACE(p);
573+
if (*p != '=')
574+
return XML_ERR_STANDALONE_VALUE;
575+
p += 1;
576+
SKIP_XML_SPACE(p);
577+
if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
578+
{
579+
*standalone = 1;
580+
p += 5;
581+
}
582+
else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
583+
{
584+
*standalone = 0;
585+
p += 4;
586+
}
587+
else
588+
return XML_ERR_STANDALONE_VALUE;
589+
}
590+
else
591+
{
592+
p = save_p;
593+
*standalone = -1;
594+
}
595+
596+
SKIP_XML_SPACE(p);
597+
if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
598+
return XML_ERR_XMLDECL_NOT_FINISHED;
599+
p += 2;
600+
601+
finished:
602+
if (len)
603+
*len = (p - str);
604+
return XML_ERR_OK;
605+
}
606+
607+
492608
/*
493609
* Convert a C string to XML internal representation
494610
*
@@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
536652
}
537653
else
538654
{
655+
size_t count;
656+
xmlChar *encoding = NULL;
657+
int standalone = -1;
658+
539659
doc = xmlNewDoc(NULL);
540660

541-
/*
542-
* FIXME: An XMLDecl is supposed to be accepted before the
543-
* content, but libxml doesn't allow this. Parse that
544-
* ourselves?
545-
*/
661+
res_code = parse_xml_decl(string, &count, &encoding, &standalone);
546662

547663
/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
548-
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL);
664+
if (res_code == 0)
665+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
549666
if (res_code != 0)
550667
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
551668
"invalid XML content", res_code);
669+
670+
doc->encoding = encoding;
671+
doc->standalone = standalone;
552672
}
553673

554674
/* TODO encoding issues

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy