43std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
48std::unique_ptr<XmlElement> parseXML (
const File& fileToParse)
55 inputSource.reset (newSource);
60 ignoreEmptyTextElements = shouldBeIgnored;
63namespace XmlIdentifierChars
65 static bool isIdentifierCharSlow (juce_wchar c)
noexcept
68 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
71 static bool isIdentifierChar (juce_wchar c)
noexcept
73 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
75 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
76 : isIdentifierCharSlow (c);
95 while (isIdentifierChar (*p))
104 if (originalText.
isEmpty() && inputSource !=
nullptr)
106 std::unique_ptr<InputStream> in (inputSource->createInputStream());
113 #if JUCE_STRING_UTF_TYPE == 8
117 auto* text =
static_cast<const char*
> (data.
getData());
139 return parseDocumentElement (originalText.
getCharPointer(), onlyReadOuterDocumentElement);
147void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
150 errorOccurred = ! carryOn;
153String XmlDocument::getFileContents (
const String& filename)
const
155 if (inputSource !=
nullptr)
157 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
160 return in->readEntireStreamAsString();
166juce_wchar XmlDocument::readNextChar() noexcept
180 const bool onlyReadOuterDocumentElement)
183 errorOccurred =
false;
185 needToLoadDTD =
true;
187 if (textToParse.isEmpty())
189 lastError =
"not enough input";
191 else if (! parseHeader())
193 lastError =
"malformed header";
195 else if (! parseDTD())
197 lastError =
"malformed DTD";
202 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
205 return result.release();
211bool XmlDocument::parseHeader()
213 skipNextWhiteSpace();
219 if (headerEnd.isEmpty())
223 auto encoding = String (input, headerEnd)
224 .fromFirstOccurrenceOf (
"encoding",
false,
true)
225 .fromFirstOccurrenceOf (
"=",
false,
false)
226 .fromFirstOccurrenceOf (
"\"",
false,
false)
227 .upToFirstOccurrenceOf (
"\"",
false,
false)
237 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
240 input = headerEnd + 2;
241 skipNextWhiteSpace();
247bool XmlDocument::parseDTD()
252 auto dtdStart = input;
254 for (
int n = 1; n > 0;)
256 auto c = readNextChar();
267 dtdText = String (dtdStart, input - 1).
trim();
273void XmlDocument::skipNextWhiteSpace()
292 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
294 if (closeComment < 0)
300 input += closeComment + 3;
307 auto closeBracket = input.
indexOf (CharPointer_ASCII (
"?>"));
309 if (closeBracket < 0)
315 input += closeBracket + 2;
324void XmlDocument::readQuotedString (String& result)
326 auto quote = readNextChar();
330 auto c = readNextChar();
347 auto character = *input;
349 if (character == quote)
351 result.appendCharPointer (start, input);
356 if (character ==
'&')
358 result.appendCharPointer (start, input);
364 setLastError (
"unmatched quotes",
false);
375XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
377 XmlElement* node =
nullptr;
378 skipNextWhiteSpace();
386 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
388 if (endOfToken == input)
391 skipNextWhiteSpace();
392 endOfToken = XmlIdentifierChars::findEndOfToken (input);
394 if (endOfToken == input)
396 setLastError (
"tag name missing",
false);
401 node =
new XmlElement (input, endOfToken);
403 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
408 skipNextWhiteSpace();
412 if (c ==
'/' && input[1] ==
'>')
423 if (alsoParseSubElements)
424 readChildElements (*node);
430 if (XmlIdentifierChars::isIdentifierChar (c))
432 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
434 if (attNameEnd != input)
436 auto attNameStart = input;
438 skipNextWhiteSpace();
440 if (readNextChar() ==
'=')
442 skipNextWhiteSpace();
443 auto nextChar = *input;
445 if (nextChar ==
'"' || nextChar ==
'\'')
447 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
448 readQuotedString (newAtt->value);
449 attributeAppender.append (newAtt);
455 setLastError (
"expected '=' after attribute '"
456 + String (attNameStart, attNameEnd) +
"'",
false);
464 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
474void XmlDocument::readChildElements (XmlElement& parent)
476 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
480 auto preWhitespaceInput = input;
481 skipNextWhiteSpace();
485 setLastError (
"unmatched tags",
false);
496 auto closeTag = input.
indexOf ((juce_wchar)
'>');
499 input += closeTag + 1;
507 auto inputStart = input;
515 setLastError (
"unterminated CDATA section",
false);
520 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
533 if (
auto* n = readNextElement (
true))
534 childAppender.append (n);
541 input = preWhitespaceInput;
542 MemoryOutputStream textElementContent;
543 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
551 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
554 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
556 if (closeComment < 0)
558 setLastError (
"unterminated comment",
false);
563 input += closeComment + 3;
572 setLastError (
"unmatched tags",
false);
582 if (entity.startsWithChar (
'<') && entity [1] != 0)
584 auto oldInput = input;
585 auto oldOutOfData = outOfData;
587 input = entity.getCharPointer();
590 while (
auto* n = readNextElement (
true))
591 childAppender.append (n);
594 outOfData = oldOutOfData;
598 textElementContent << entity;
599 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
606 auto nextChar = *input;
608 if (nextChar ==
'\r')
612 if (input[1] ==
'\n')
616 if (nextChar ==
'<' || nextChar ==
'&')
621 setLastError (
"unmatched tags",
false);
626 textElementContent.appendUTF8Char (nextChar);
632 if (contentShouldBeUsed)
638void XmlDocument::readEntity (String& result)
668 else if (*input ==
'#')
673 if (*input ==
'x' || *input ==
'X')
678 while (input[0] !=
';')
682 if (hexValue < 0 || ++numChars > 8)
684 setLastError (
"illegal escape sequence",
true);
688 charCode = (charCode << 4) | hexValue;
694 else if (input[0] >=
'0' && input[0] <=
'9')
698 while (input[0] !=
';')
702 setLastError (
"illegal escape sequence",
true);
706 charCode = charCode * 10 + ((int) input[0] -
'0');
714 setLastError (
"illegal escape sequence",
true);
719 result << (juce_wchar) charCode;
723 auto entityNameStart = input;
724 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
726 if (closingSemiColon < 0)
733 input += closingSemiColon + 1;
734 result += expandExternalEntity (String (entityNameStart, (
size_t) closingSemiColon));
739String XmlDocument::expandEntity (
const String& ent)
751 if (char1 ==
'x' || char1 ==
'X')
754 if (char1 >=
'0' && char1 <=
'9')
757 setLastError (
"illegal escape sequence",
false);
761 return expandExternalEntity (ent);
764String XmlDocument::expandExternalEntity (
const String& entity)
773 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
774 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
776 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
778 tokenisedDTD.
clear();
779 tokenisedDTD.
addTokens (getFileContents (fn),
true);
783 tokenisedDTD.
clear();
790 if (closeBracket > openBracket)
792 closeBracket),
true);
796 for (
int i = tokenisedDTD.
size(); --i >= 0;)
798 if (tokenisedDTD[i].startsWithChar (
'%')
799 && tokenisedDTD[i].endsWithChar (
';'))
801 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
803 newToks.addTokens (parsed,
true);
807 for (
int j = newToks.size(); --j >= 0;)
808 tokenisedDTD.
insert (i, newToks[j]);
813 needToLoadDTD =
false;
816 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
818 if (tokenisedDTD[i] == entity)
820 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
822 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
825 auto ampersand = ent.indexOfChar (
'&');
827 while (ampersand >= 0)
829 auto semiColon = ent.indexOf (i + 1,
";");
833 setLastError (
"entity without terminating semi-colon",
false);
837 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
839 ent = ent.substring (0, ampersand)
841 + ent.substring (semiColon + 1);
843 ampersand = ent.indexOfChar (semiColon + 1,
'&');
851 setLastError (
"unknown entity",
true);
855String XmlDocument::getParameterEntity (
const String& entity)
857 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
859 if (tokenisedDTD[i] == entity
860 && tokenisedDTD [i - 1] ==
"%"
861 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
863 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
865 if (ent.equalsIgnoreCase (
"system"))
866 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
868 return ent.trim().unquoted();
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
int compareIgnoreCaseUpTo(const CharPointer other, const int maxChars) const noexcept
Compares this string with another one, up to a specified number of characters.
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
int indexOf(const CharPointer stringToFind) const noexcept
Returns the character index of a substring, or -1 if it isn't found.
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
CharPointer_UTF8 findEndOfWhitespace() const noexcept
Returns the first non-whitespace character in the string.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Returns a pointer to the first occurrence of a substring in a string.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.
Represents a local file or directory.
Writes data to an internal memory buffer, which grows as required.
const void * getData() const noexcept
Returns a pointer to the data that has been written to the stream.
String toString() const
Attempts to detect the encoding of the data and convert it to a string.
size_t getDataSize() const noexcept
Returns the number of bytes of data that have been written to the stream.
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
Reads data from an input stream and writes it to this stream.
virtual bool writeByte(char byte)
Writes a single byte to the stream.
void insert(int index, String stringToAdd)
Inserts a string into the array.
void clear()
Removes all elements from the array.
int size() const noexcept
Returns the number of strings in the array.
void trim()
Deletes any whitespace characters from the starts and ends of all the strings.
void remove(int index)
Removes a string from the array.
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
Breaks up a string into tokens and adds them to this array.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
int indexOfChar(juce_wchar characterToLookFor) const noexcept
Searches for a character inside this string.
String trim() const
Returns a copy of this string with any whitespace characters removed from the start and end.
bool isEmpty() const noexcept
Returns true if the string contains no characters.
void clear() noexcept
Resets this string to be empty.
int lastIndexOfChar(juce_wchar character) const noexcept
Searches for a character inside this string (working backwards from the end of the string).
String trimCharactersAtEnd(StringRef charactersToTrim) const
Returns a copy of this string, having removed a specified set of characters from its end.
static String charToString(juce_wchar character)
Creates a string from a single character.
String substring(int startIndex, int endIndex) const
Returns a subsection of the string.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
bool isNotEmpty() const noexcept
Returns true if the string contains at least one character.
Parses a text-based XML document and creates an XmlElement object from it.
const String & getLastParseError() const noexcept
Returns the parsing error that occurred the last time getDocumentElement was called.
static XmlElement * parse(const File &file)
A handy static method that parses a file.
XmlDocument(const String &documentText)
Creates an XmlDocument from the xml text.
void setInputSource(InputSource *newSource) noexcept
Sets an input source object to use for parsing documents that reference external entities.
XmlElement * getDocumentElement(bool onlyReadOuterDocumentElement=false)
Creates an XmlElement object to represent the main document node.
~XmlDocument()
Destructor.
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
Sets a flag to change the treatment of empty text elements.
Used to build a tree of elements representing an XML document.
static XmlElement * createTextElement(const String &text)
Creates a text element that can be added to a parent element.