{-# LANGUAGE OverloadedStrings #-}

module Text.XmlHtml.XML.Parse where

import           Control.Applicative
import           Control.Monad
import           Data.Char
import           Data.List
import           Data.Maybe
import           Text.XmlHtml.Common
import           Text.XmlHtml.TextParser

import qualified Text.Parsec as P

import           Data.Map (Map)
import qualified Data.Map as M

import           Data.Text (Text)
import qualified Data.Text as T


------------------------------------------------------------------------------
-- | This is my best guess as to the best rule for handling document fragments
-- for processing.  It is essentially modeled after document, but allowing
-- multiple nodes.
docFragment :: Encoding -> Parser Document
docFragment :: Encoding -> Parser Document
docFragment Encoding
e = do
    (Maybe DocType
dt, [Node]
nodes1) <- Parser (Maybe DocType, [Node])
prolog
    [Node]
nodes2       <- Parser [Node]
content
    Document -> Parser Document
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Document -> Parser Document) -> Document -> Parser Document
forall a b. (a -> b) -> a -> b
$ Encoding -> Maybe DocType -> [Node] -> Document
XmlDocument Encoding
e Maybe DocType
dt ([Node]
nodes1 [Node] -> [Node] -> [Node]
forall a. [a] -> [a] -> [a]
++ [Node]
nodes2)


------------------------------------------------------------------------------
-- Everything from here forward is translated from the XML specification.   --
------------------------------------------------------------------------------

{-
    Map from numbered productions in the XML specification to symbols here:

    PROD  SPEC NAME          PARSER NAME           NOTES
    -----|------------------|---------------------|-------
    [1]   document           document
    [2]   Char                                     {2}
    [3]   S                  whiteSpace
    [4]   NameStartChar      isNameStartChar       {1}
    [4a]  NameChar           isNameChar            {1}
    [5]   Name               name
    [6]   Names              names
    [7]   Nmtoken            nmtoken
    [8]   Nmtokens           nmtokens
    [9]   EntityValue                              {4}
    [10]  AttValue           attrValue
    [11]  SystemLiteral      systemLiteral
    [12]  PubidLiteral       pubIdLiteral
    [13]  PubidChar          isPubIdChar           {1}
    [14]  CharData           charData
    [15]  Comment            comment
    [16]  PI                 processingInstruction
    [17]  PITarget           piTarget
    [18]  CDSect             cdSect
    [19]  CDStart            cdSect                {3}
    [20]  CData              cdSect                {3}
    [21]  CDEnd              cdSect                {3}
    [22]  prolog             prolog
    [23]  XMLDecl            xmlDecl
    [24]  VersionInfo        versionInfo
    [25]  Eq                 eq
    [26]  VersionNum         versionInfo           {3}
    [27]  Misc               misc
    [28]  doctypedecl        docTypeDecl
    [28a] DeclSep                                  {4}
    [28b] intSubset                                {4}
    [29]  markupdecl                               {4}
    [30]  extSubset                                {4}
    [31]  extSubsetDecl                            {4}
    [32]  SDDecl             sdDecl
    [39]  element            element
    [40]  STag               emptyOrStartTag
    [41]  Attribute          attribute
    [42]  ETag               endTag
    [43]  content            content
    [44]  EmptyElemTag       emptyOrStartTag
    [45]  elementDecl                              {4}
    [46]  contentSpec                              {4}
    [47]  children                                 {4}
    [48]  cp                                       {4}
    [49]  choice                                   {4}
    [50]  seq                                      {4}
    [51]  Mixed                                    {4}
    [52]  AttlistDecl                              {4}
    [53]  AttDef                                   {4}
    [54]  AttType                                  {4}
    [55]  StringType                               {4}
    [56]  TokenizedType                            {4}
    [57]  EnumeratedType                           {4}
    [58]  NotationType                             {4}
    [59]  Enumeration                              {4}
    [60]  DefaultDecl                              {4}
    [61]  conditionalSect                          {4}
    [62]  includeSect                              {4}
    [63]  ignoreSect                               {4}
    [64]  ignoreSectContents                       {4}
    [65]  Ignore                                   {4}
    [66]  CharRef            charRef
    [67]  Reference          reference
    [68]  EntityRef          entityRef
    [69]  PEReference                              {4}
    [70]  EntityDecl                               {4}
    [71]  GEDecl                                   {4}
    [72]  PEDecl                                   {4}
    [73]  EntityDef                                {4}
    [74]  PEDef                                    {4}
    [75]  ExternalID         externalID
    [76]  NDataDecl                                {4}
    [77]  TextDecl           textDecl
    [78]  extParsedEnt       extParsedEnt
    [80]  EncodingDecl       encodingDecl
    [81]  EncName            encodingDecl          {3}
    [82]  NotationDecl                             {4}
    [83]  PublicID                                 {4}
    [84]  Letter                                   {5}
    [85]  BaseChar                                 {5}
    [86]  Ideographic                              {5}
    [87]  CombiningChar                            {5}
    [88]  Digit                                    {5}
    [89]  Extender                                 {5}

    Notes:
        {1} - These productions match single characters, and so are
              implemented as predicates instead of parsers.
        {3} - Denotes a production which is not exposed as a top-level symbol
              because it is trivial and included in another definition.
        {4} - This module does not contain a parser for the DTD subsets, so
              grammar that occurs only in DTD subsets is not defined.
        {5} - These are orphaned productions for character classes.
-}


------------------------------------------------------------------------------
whiteSpace :: Parser ()
whiteSpace :: Parser ()
whiteSpace = ParsecT Text () Identity Char -> ParsecT Text () Identity String
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
' ',Char
'\t',Char
'\r',Char
'\n'])) ParsecT Text () Identity String -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parser ()
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()


------------------------------------------------------------------------------
isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar Char
c | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':'                         = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'                         = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a'       Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'       = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A'       Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z'       = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xc0'    Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xd6'    = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xd8'    Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xf6'    = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xf8'    Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2ff'   = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x370'   Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37d'   = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x37f'   Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1fff'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200c'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200d'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218f'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2c00'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2fef'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xd7ff'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xf900'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xfdcf'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xfdf0'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xfffd'  = Bool
True
                  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xeffff' = Bool
True
                  | Bool
otherwise                        = Bool
False


------------------------------------------------------------------------------
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c | Char -> Bool
isNameStartChar Char
c                = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-'                         = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.'                         = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xb7'                      = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0'       Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'       = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x300'   Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x36f'   = Bool
True
             | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x203f'  Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040'  = Bool
True
             | Bool
otherwise                        = Bool
False


------------------------------------------------------------------------------
name :: Parser Text
name :: Parser Text
name = do
    Char
c <- (Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isNameStartChar
    Text
r <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isNameChar
    Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
T.cons Char
c Text
r


------------------------------------------------------------------------------
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = ([Text] -> Text) -> ParsecT Text () Identity [Text] -> Parser Text
forall a b.
(a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (ParsecT Text () Identity [Text]
singleQuoted ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity [Text]
doubleQuoted)
  where
    singleQuoted :: ParsecT Text () Identity [Text]
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' ParsecT Text () Identity Char
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> String -> ParsecT Text () Identity [Text]
forall {t :: * -> *}.
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill [Char
'<',Char
'&',Char
'\''] ParsecT Text () Identity [Text]
-> ParsecT Text () Identity Char -> ParsecT Text () Identity [Text]
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
    doubleQuoted :: ParsecT Text () Identity [Text]
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"'  ParsecT Text () Identity Char
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> String -> ParsecT Text () Identity [Text]
forall {t :: * -> *}.
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill [Char
'<',Char
'&',Char
'"']  ParsecT Text () Identity [Text]
-> ParsecT Text () Identity Char -> ParsecT Text () Identity [Text]
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"'
    refTill :: t Char -> ParsecT Text () Identity [Text]
refTill t Char
end = Parser Text -> ParsecT Text () Identity [Text]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> t Char -> Bool
forall a. Eq a => a -> t a -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
end)) Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
reference)


------------------------------------------------------------------------------
systemLiteral :: Parser Text
systemLiteral :: Parser Text
systemLiteral = Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
  where
    singleQuoted :: Parser Text
singleQuoted = do
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
        Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''))
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
    doubleQuoted :: Parser Text
doubleQuoted = do
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
        Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\"'))
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x


------------------------------------------------------------------------------
pubIdLiteral :: Parser Text
pubIdLiteral :: Parser Text
pubIdLiteral = Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
  where
    singleQuoted :: Parser Text
singleQuoted = do
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
        Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (\Char
c -> Char -> Bool
isPubIdChar Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'')
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
    doubleQuoted :: Parser Text
doubleQuoted = do
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
        Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isPubIdChar
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x


------------------------------------------------------------------------------
isPubIdChar :: Char -> Bool
isPubIdChar :: Char -> Bool
isPubIdChar Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'                 = Bool
True
              | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z'                 = Bool
True
              | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'                 = Bool
True
              | Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
otherChars                  = Bool
True
              | Bool
otherwise                            = Bool
False
  where
    otherChars :: String
otherChars = String
" \r\n-\'()+,./:=?;!*#@$_%" :: [Char]

------------------------------------------------------------------------------
-- | The requirement to not contain "]]>" is for SGML compatibility.  We
-- deliberately choose to not enforce it.  This makes the parser accept
-- strictly more documents than a standards-compliant parser.
charData :: Parser Node
charData :: Parser Node
charData = Text -> Node
TextNode (Text -> Node) -> Parser Text -> Parser Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'<',Char
'&']))


------------------------------------------------------------------------------
comment :: Parser (Maybe Node)
comment :: Parser (Maybe Node)
comment = Text -> Parser Text
text Text
"<!--" Parser Text -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> (Text -> Node) -> Text -> Maybe Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
Comment (Text -> Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
commentText) Parser (Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Text -> Parser Text
text Text
"-->"
  where
    commentText :: Parser Text
commentText = ([Text] -> Text) -> ParsecT Text () Identity [Text] -> Parser Text
forall a b.
(a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (ParsecT Text () Identity [Text] -> Parser Text)
-> ParsecT Text () Identity [Text] -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser Text -> ParsecT Text () Identity [Text]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Text -> ParsecT Text () Identity [Text])
-> Parser Text -> ParsecT Text () Identity [Text]
forall a b. (a -> b) -> a -> b
$
        Parser Text
nonDash Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Char -> Text -> Text
T.cons (Char -> Text -> Text)
-> ParsecT Text () Identity Char
-> ParsecT Text () Identity (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-' ParsecT Text () Identity (Text -> Text)
-> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity (a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Text
nonDash)
    nonDash :: Parser Text
nonDash = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-'))


------------------------------------------------------------------------------
-- | Always returns Nothing since there's no representation for a PI in the
-- document tree.
processingInstruction :: Parser (Maybe Node)
processingInstruction :: Parser (Maybe Node)
processingInstruction = do
    Text
_ <- Text -> Parser Text
text Text
"<?"
    ()
_ <- Parser ()
piTarget
    String
_ <- ParsecT Text () Identity String
forall {u}. ParsecT Text u Identity String
emptyEnd ParsecT Text () Identity String
-> ParsecT Text () Identity String
-> ParsecT Text () Identity String
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity String
contentEnd
    Maybe Node -> Parser (Maybe Node)
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Node
forall a. Maybe a
Nothing
  where
    emptyEnd :: ParsecT Text u Identity String
emptyEnd   = ParsecT Text u Identity String -> ParsecT Text u Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (String -> ParsecT Text u Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
P.string String
"?>")
    contentEnd :: ParsecT Text () Identity String
contentEnd = ParsecT Text () Identity String -> ParsecT Text () Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text () Identity String
 -> ParsecT Text () Identity String)
-> ParsecT Text () Identity String
-> ParsecT Text () Identity String
forall a b. (a -> b) -> a -> b
$ do
        ()
_ <- Parser ()
whiteSpace
        ParsecT Text () Identity Char
-> Parser Text -> ParsecT Text () Identity String
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill ParsecT Text () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar (Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Text -> Parser Text
text Text
"?>")

------------------------------------------------------------------------------
piTarget :: Parser ()
piTarget :: Parser ()
piTarget = do
    Text
n <- Parser Text
name
    Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Char -> Char) -> Text -> Text
T.map Char -> Char
toLower Text
n Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"xml") (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ String -> Parser ()
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"xml declaration can't occur here"


------------------------------------------------------------------------------
cdata :: [Char] -> Parser a -> Parser Node
cdata :: forall a. String -> Parser a -> Parser Node
cdata String
cs Parser a
end = Text -> Node
TextNode (Text -> Node) -> ([Text] -> Text) -> [Text] -> Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> Text
T.concat ([Text] -> Node) -> ParsecT Text () Identity [Text] -> Parser Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text -> Parser a -> ParsecT Text () Identity [Text]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill Parser Text
part Parser a
end
  where part :: Parser Text
part = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
cs))
             Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Text
T.singleton (Char -> Text) -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Text () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar


------------------------------------------------------------------------------
cdSect :: Parser (Maybe Node)
cdSect :: Parser (Maybe Node)
cdSect = Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> do
    Text
_ <- Text -> Parser Text
text Text
"<![CDATA["
    String -> Parser Text -> Parser Node
forall a. String -> Parser a -> Parser Node
cdata String
"]" (Text -> Parser Text
text Text
"]]>")


------------------------------------------------------------------------------
prolog :: Parser (Maybe DocType, [Node])
prolog :: Parser (Maybe DocType, [Node])
prolog = do
    Maybe (Maybe Text)
_      <- ParsecT Text () Identity (Maybe Text)
-> ParsecT Text () Identity (Maybe (Maybe Text))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity (Maybe Text)
xmlDecl
    [Maybe Node]
nodes1 <- Parser (Maybe Node) -> ParsecT Text () Identity [Maybe Node]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
    Maybe (DocType, [Maybe Node])
rest   <- ParsecT Text () Identity (DocType, [Maybe Node])
-> ParsecT Text () Identity (Maybe (DocType, [Maybe Node]))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Text () Identity (DocType, [Maybe Node])
 -> ParsecT Text () Identity (Maybe (DocType, [Maybe Node])))
-> ParsecT Text () Identity (DocType, [Maybe Node])
-> ParsecT Text () Identity (Maybe (DocType, [Maybe Node]))
forall a b. (a -> b) -> a -> b
$ do
        DocType
dt     <- Parser DocType
docTypeDecl
        [Maybe Node]
nodes2 <- Parser (Maybe Node) -> ParsecT Text () Identity [Maybe Node]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
        (DocType, [Maybe Node])
-> ParsecT Text () Identity (DocType, [Maybe Node])
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType
dt, [Maybe Node]
nodes2)
    case Maybe (DocType, [Maybe Node])
rest of
        Maybe (DocType, [Maybe Node])
Nothing           -> (Maybe DocType, [Node]) -> Parser (Maybe DocType, [Node])
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe DocType
forall a. Maybe a
Nothing, [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes [Maybe Node]
nodes1)
        Just (DocType
dt, [Maybe Node]
nodes2) -> (Maybe DocType, [Node]) -> Parser (Maybe DocType, [Node])
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType -> Maybe DocType
forall a. a -> Maybe a
Just DocType
dt, [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe Node]
nodes1 [Maybe Node] -> [Maybe Node] -> [Maybe Node]
forall a. [a] -> [a] -> [a]
++ [Maybe Node]
nodes2))


------------------------------------------------------------------------------
-- | Return value is the encoding, if present.
xmlDecl :: Parser (Maybe Text)
xmlDecl :: ParsecT Text () Identity (Maybe Text)
xmlDecl = do
    Text
_ <- Text -> Parser Text
text Text
"<?xml"
    ()
_ <- Parser ()
versionInfo
    Maybe Text
e <- Parser Text -> ParsecT Text () Identity (Maybe Text)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
encodingDecl
    Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
sdDecl
    Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
    Text
_ <- Text -> Parser Text
text Text
"?>"
    Maybe Text -> ParsecT Text () Identity (Maybe Text)
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Text
e


------------------------------------------------------------------------------
versionInfo :: Parser ()
versionInfo :: Parser ()
versionInfo = do
    Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"version" Parser Text -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
eq Parser () -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser ()
singleQuoted Parser () -> Parser () -> Parser ()
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
doubleQuoted)
  where
    singleQuoted :: Parser ()
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' ParsecT Text () Identity Char -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum Parser () -> ParsecT Text () Identity Char -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
    doubleQuoted :: Parser ()
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' ParsecT Text () Identity Char -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum Parser () -> ParsecT Text () Identity Char -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
    versionNum :: Parser ()
versionNum   = do
        Text
_ <- Text -> Parser Text
text Text
"1."
        String
_ <- ParsecT Text () Identity Char -> ParsecT Text () Identity String
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'))
        () -> Parser ()
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()


------------------------------------------------------------------------------
eq :: Parser ()
eq :: Parser ()
eq = Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace ParsecT Text () Identity (Maybe ())
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'=' ParsecT Text () Identity Char
-> ParsecT Text () Identity (Maybe ())
-> ParsecT Text () Identity (Maybe ())
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace ParsecT Text () Identity (Maybe ()) -> Parser () -> Parser ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parser ()
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()


------------------------------------------------------------------------------
misc :: Parser (Maybe Node)
misc :: Parser (Maybe Node)
misc = Parser (Maybe Node)
comment Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
whiteSpace Parser () -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Maybe Node -> Parser (Maybe Node)
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Node
forall a. Maybe a
Nothing)


------------------------------------------------------------------------------
-- | Internal subset is parsed, but ignored since we don't have data types to
-- store it.
docTypeDecl :: Parser DocType
docTypeDecl :: Parser DocType
docTypeDecl = do
    Text
_      <- Text -> Parser Text
text Text
"<!DOCTYPE"
    Parser ()
whiteSpace
    Text
tag    <- Parser Text
name
    Maybe ()
_      <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
    ExternalID
extid  <- Parser ExternalID
externalID
    Maybe ()
_      <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
    InternalSubset
intsub <- Parser InternalSubset
internalDoctype
    Char
_      <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
    DocType -> Parser DocType
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ExternalID -> InternalSubset -> DocType
DocType Text
tag ExternalID
extid InternalSubset
intsub)


------------------------------------------------------------------------------
-- | States for the DOCTYPE internal subset state machine.
data InternalDoctypeState = IDSStart
                          | IDSScanning Int
                          | IDSInQuote Int Char
                          | IDSCommentS1 Int
                          | IDSCommentS2 Int
                          | IDSCommentS3 Int
                          | IDSComment Int
                          | IDSCommentD1 Int
                          | IDSCommentE1 Int


------------------------------------------------------------------------------
-- | Internal DOCTYPE subset.  We don't actually parse this; just scan through
-- and look for the end, and store it in a block of text.
internalDoctype :: Parser InternalSubset
internalDoctype :: Parser InternalSubset
internalDoctype = Text -> InternalSubset
InternalText (Text -> InternalSubset)
-> (String -> Text) -> String -> InternalSubset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Text
T.pack (String -> InternalSubset)
-> ParsecT Text () Identity String -> Parser InternalSubset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> ScanState) -> ParsecT Text () Identity String
scanText (InternalDoctypeState -> Char -> ScanState
dfa InternalDoctypeState
IDSStart)
              Parser InternalSubset
-> Parser InternalSubset -> Parser InternalSubset
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> InternalSubset -> Parser InternalSubset
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return InternalSubset
NoInternalSubset
  where dfa :: InternalDoctypeState -> Char -> ScanState
dfa InternalDoctypeState
IDSStart Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
0))
        dfa InternalDoctypeState
IDSStart Char
_   = String -> ScanState
ScanFail String
"Not a DOCTYPE internal subset"
        dfa (IDSInQuote Int
n Char
c) Char
d
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
d                = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
          | Bool
otherwise             = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
c))
        dfa (IDSScanning Int
n) Char
'['   = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)))
        dfa (IDSScanning Int
0) Char
']'   = ScanState
ScanFinish
        dfa (IDSScanning Int
n) Char
']'   = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)))
        dfa (IDSScanning Int
n) Char
'\''  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
        dfa (IDSScanning Int
n) Char
'\"'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
        dfa (IDSScanning Int
n) Char
'<'   = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS1 Int
n))
        dfa (IDSScanning Int
n) Char
_     = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
        dfa (IDSCommentS1 Int
n) Char
'['  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)))
        dfa (IDSCommentS1 Int
0) Char
']'  = ScanState
ScanFinish
        dfa (IDSCommentS1 Int
n) Char
']'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)))
        dfa (IDSCommentS1 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
        dfa (IDSCommentS1 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
        dfa (IDSCommentS1 Int
n) Char
'!'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS2 Int
n))
        dfa (IDSCommentS1 Int
n) Char
_    = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
        dfa (IDSCommentS2 Int
n) Char
'['  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)))
        dfa (IDSCommentS2 Int
0) Char
']'  = ScanState
ScanFinish
        dfa (IDSCommentS2 Int
n) Char
']'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)))
        dfa (IDSCommentS2 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
        dfa (IDSCommentS2 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
        dfa (IDSCommentS2 Int
n) Char
'-'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS3 Int
n))
        dfa (IDSCommentS2 Int
n) Char
_    = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
        dfa (IDSCommentS3 Int
n) Char
'['  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)))
        dfa (IDSCommentS3 Int
0) Char
']'  = ScanState
ScanFinish
        dfa (IDSCommentS3 Int
n) Char
']'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)))
        dfa (IDSCommentS3 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
        dfa (IDSCommentS3 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
        dfa (IDSCommentS3 Int
n) Char
'-'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
        dfa (IDSCommentS3 Int
n) Char
_    = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
        dfa (IDSComment Int
n) Char
'-'    = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentD1 Int
n))
        dfa (IDSComment Int
n) Char
_      = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
        dfa (IDSCommentD1 Int
n) Char
'-'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentE1 Int
n))
        dfa (IDSCommentD1 Int
n) Char
_    = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
        dfa (IDSCommentE1 Int
n) Char
'>'  = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
        dfa (IDSCommentE1 Int
_) Char
_    = String -> ScanState
ScanFail String
"Poorly formatted comment"


------------------------------------------------------------------------------
sdDecl :: Parser ()
sdDecl :: Parser ()
sdDecl = do
    Text
_ <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"standalone"
    Parser ()
eq
    Text
_ <- Parser Text
single Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
double
    () -> Parser ()
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  where
    single :: Parser Text
single = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
    double :: Parser Text
double = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
    yesno :: Parser Text
yesno  = Text -> Parser Text
text Text
"yes" Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
text Text
"no"


------------------------------------------------------------------------------
element :: Parser Node
element :: Parser Node
element = do
    (Text
t,[(Text, Text)]
a,Bool
b) <- Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag
    if Bool
b then Node -> Parser Node
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [])
         else Text -> [(Text, Text)] -> Parser Node
nonEmptyElem Text
t [(Text, Text)]
a
  where
    nonEmptyElem :: Text -> [(Text, Text)] -> Parser Node
nonEmptyElem Text
t [(Text, Text)]
a = do
        [Node]
c <- Parser [Node]
content
        Text -> Parser ()
endTag Text
t
        Node -> Parser Node
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c)


------------------------------------------------------------------------------
-- | Results are (tag name, attributes, isEmpty)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag = do
    Text
t <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'<' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
name
    [(Text, Text)]
a <- ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity [(Text, Text)]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (ParsecT Text () Identity (Text, Text)
 -> ParsecT Text () Identity [(Text, Text)])
-> ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity (Text, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text () Identity (Text, Text)
 -> ParsecT Text () Identity (Text, Text))
-> ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity (Text, Text)
forall a b. (a -> b) -> a -> b
$ do
        Parser ()
whiteSpace
        ParsecT Text () Identity (Text, Text)
attribute
    Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(Text, Text)] -> Bool
forall {a} {b}. Eq a => [(a, b)] -> Bool
hasDups [(Text, Text)]
a) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ String -> Parser ()
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Duplicate attribute names in element"
    Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
    Maybe Char
e <- ParsecT Text () Identity Char
-> ParsecT Text () Identity (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'/')
    Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
    (Text, [(Text, Text)], Bool) -> Parser (Text, [(Text, Text)], Bool)
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t, [(Text, Text)]
a, Maybe Char -> Bool
forall a. Maybe a -> Bool
isJust Maybe Char
e)
  where
    hasDups :: [(a, b)] -> Bool
hasDups [(a, b)]
a = [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([a] -> [a]
forall a. Eq a => [a] -> [a]
nub (((a, b) -> a) -> [(a, b)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a, b) -> a
forall a b. (a, b) -> a
fst [(a, b)]
a)) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [(a, b)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(a, b)]
a


------------------------------------------------------------------------------
attribute :: Parser (Text, Text)
attribute :: ParsecT Text () Identity (Text, Text)
attribute = do
    Text
n <- Parser Text
name
    Parser ()
eq
    Text
v <- Parser Text
attrValue
    (Text, Text) -> ParsecT Text () Identity (Text, Text)
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
n,Text
v)


------------------------------------------------------------------------------
endTag :: Text -> Parser ()
endTag :: Text -> Parser ()
endTag Text
s = do
    Text
_ <- Text -> Parser Text
text Text
"</"
    Text
t <- Parser Text
name
    Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text
s Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
t) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ String -> Parser ()
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ()) -> String -> Parser ()
forall a b. (a -> b) -> a -> b
$ String
"mismatched tags: </" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
t String -> String -> String
forall a. [a] -> [a] -> [a]
++
                           String
"> found inside <" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"> tag"
    Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
    Text
_ <- Text -> Parser Text
text Text
">"
    () -> Parser ()
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()


------------------------------------------------------------------------------
content :: Parser [Node]
content :: Parser [Node]
content = do
    Maybe Node
n  <- Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
    [Maybe Node]
ns <- ([[Maybe Node]] -> [Maybe Node])
-> ParsecT Text () Identity [[Maybe Node]]
-> ParsecT Text () Identity [Maybe Node]
forall a b.
(a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Maybe Node]] -> [Maybe Node]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (ParsecT Text () Identity [[Maybe Node]]
 -> ParsecT Text () Identity [Maybe Node])
-> ParsecT Text () Identity [[Maybe Node]]
-> ParsecT Text () Identity [Maybe Node]
forall a b. (a -> b) -> a -> b
$ ParsecT Text () Identity [Maybe Node]
-> ParsecT Text () Identity [[Maybe Node]]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (ParsecT Text () Identity [Maybe Node]
 -> ParsecT Text () Identity [[Maybe Node]])
-> ParsecT Text () Identity [Maybe Node]
-> ParsecT Text () Identity [[Maybe Node]]
forall a b. (a -> b) -> a -> b
$ do
        Maybe Node
s <- ((Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> (Text -> Node) -> Text -> Maybe Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
TextNode (Text -> Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
reference)
               Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
cdSect
               Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction
               Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
comment
               Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Node -> Maybe Node) -> Parser Node -> Parser (Maybe Node)
forall a b.
(a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Node -> Maybe Node
forall a. a -> Maybe a
Just Parser Node
element)
        Maybe Node
t <- Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
        [Maybe Node] -> ParsecT Text () Identity [Maybe Node]
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return [Maybe Node
s,Maybe Node
t]
    [Node] -> Parser [Node]
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Parser [Node]) -> [Node] -> Parser [Node]
forall a b. (a -> b) -> a -> b
$ [Node] -> [Node]
coalesceText ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$ [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes (Maybe Node
nMaybe Node -> [Maybe Node] -> [Maybe Node]
forall a. a -> [a] -> [a]
:[Maybe Node]
ns)
  where
    coalesceText :: [Node] -> [Node]
coalesceText (TextNode Text
s : TextNode Text
t : [Node]
ns)
        = [Node] -> [Node]
coalesceText (Text -> Node
TextNode (Text -> Text -> Text
T.append Text
s Text
t) Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [Node]
ns)
    coalesceText (Node
n:[Node]
ns)
        = Node
n Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [Node] -> [Node]
coalesceText [Node]
ns
    coalesceText []
        = []


------------------------------------------------------------------------------
charRef :: Parser Text
charRef :: Parser Text
charRef = Parser Text
hexCharRef Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
decCharRef
  where
    decCharRef :: Parser Text
decCharRef = do
        Text
_ <- Text -> Parser Text
text Text
"&#"
        [Int]
ds <- ParsecT Text () Identity Int -> ParsecT Text () Identity [Int]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ParsecT Text () Identity Int
forall {u}. ParsecT Text u Identity Int
digit
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
        Char
c <- Int -> ParsecT Text () Identity Char
safeChr (Int -> ParsecT Text () Identity Char)
-> Int -> ParsecT Text () Identity Char
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
        Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ String -> Parser ()
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ()) -> String -> Parser ()
forall a b. (a -> b) -> a -> b
$
            String
"Reference is not a valid character"
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
      where
        digit :: ParsecT Text u Identity Int
digit = do
            Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
            Int -> ParsecT Text u Identity Int
forall a. a -> ParsecT Text u Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
    hexCharRef :: Parser Text
hexCharRef = do
        Text
_ <- Text -> Parser Text
text Text
"&#x"
        [Int]
ds <- ParsecT Text () Identity Int -> ParsecT Text () Identity [Int]
forall a.
ParsecT Text () Identity a -> ParsecT Text () Identity [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ParsecT Text () Identity Int
forall {u}. ParsecT Text u Identity Int
digit
        Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
        Char
c <- Int -> ParsecT Text () Identity Char
safeChr (Int -> ParsecT Text () Identity Char)
-> Int -> ParsecT Text () Identity Char
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
16 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
        Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ String -> Parser ()
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ()) -> String -> Parser ()
forall a b. (a -> b) -> a -> b
$
            String
"Reference is not a valid character"
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
      where
        digit :: ParsecT Text u Identity Int
digit = ParsecT Text u Identity Int
forall {u}. ParsecT Text u Identity Int
num ParsecT Text u Identity Int
-> ParsecT Text u Identity Int -> ParsecT Text u Identity Int
forall a.
ParsecT Text u Identity a
-> ParsecT Text u Identity a -> ParsecT Text u Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text u Identity Int
forall {u}. ParsecT Text u Identity Int
upper ParsecT Text u Identity Int
-> ParsecT Text u Identity Int -> ParsecT Text u Identity Int
forall a.
ParsecT Text u Identity a
-> ParsecT Text u Identity a -> ParsecT Text u Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text u Identity Int
forall {u}. ParsecT Text u Identity Int
lower
        num :: ParsecT Text u Identity Int
num = do
            Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
            Int -> ParsecT Text u Identity Int
forall a. a -> ParsecT Text u Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
        upper :: ParsecT Text u Identity Int
upper = do
            Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'F')
            Int -> ParsecT Text u Identity Int
forall a. a -> ParsecT Text u Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A')
        lower :: ParsecT Text u Identity Int
lower = do
            Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'f')
            Int -> ParsecT Text u Identity Int
forall a. a -> ParsecT Text u Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a')


------------------------------------------------------------------------------
reference :: Parser Text
reference :: Parser Text
reference = Parser Text
charRef Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
entityRef


------------------------------------------------------------------------------
entityRef :: Parser Text
entityRef :: Parser Text
entityRef = do
    Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'&'
    Text
n <- Parser Text
name
    Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
    case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
n Map Text Text
entityRefLookup of
        Maybe Text
Nothing -> String -> Parser Text
forall a. String -> ParsecT Text () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser Text) -> String -> Parser Text
forall a b. (a -> b) -> a -> b
$ String
"Unknown entity reference: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
n
        Just Text
t  -> Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
  where
    entityRefLookup :: Map Text Text
    entityRefLookup :: Map Text Text
entityRefLookup = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [
        (Text
"amp", Text
"&"),
        (Text
"lt", Text
"<"),
        (Text
"gt", Text
">"),
        (Text
"apos", Text
"\'"),
        (Text
"quot", Text
"\"")
        ]


------------------------------------------------------------------------------
externalID :: Parser ExternalID
externalID :: Parser ExternalID
externalID = Parser ExternalID
systemID Parser ExternalID -> Parser ExternalID -> Parser ExternalID
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ExternalID
publicID Parser ExternalID -> Parser ExternalID -> Parser ExternalID
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ExternalID -> Parser ExternalID
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ExternalID
NoExternalID
  where
    systemID :: Parser ExternalID
systemID = do
        Text
_ <- Text -> Parser Text
text Text
"SYSTEM"
        Parser ()
whiteSpace
        (Text -> ExternalID) -> Parser Text -> Parser ExternalID
forall a b.
(a -> b)
-> ParsecT Text () Identity a -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ExternalID
System Parser Text
systemLiteral
    publicID :: Parser ExternalID
publicID = do
        Text
_ <- Text -> Parser Text
text Text
"PUBLIC"
        Parser ()
whiteSpace
        Text
pid <- Parser Text
pubIdLiteral
        Parser ()
whiteSpace
        Text
sid <- Parser Text
systemLiteral
        ExternalID -> Parser ExternalID
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text -> ExternalID
Public Text
pid Text
sid)


------------------------------------------------------------------------------
encodingDecl :: Parser Text
encodingDecl :: Parser Text
encodingDecl = do
    Text
_ <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"encoding"
    ()
_ <- Parser ()
eq
    Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Text () Identity a
-> ParsecT Text () Identity a -> ParsecT Text () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
  where
    singleQuoted :: Parser Text
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
    doubleQuoted :: Parser Text
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
    encName :: Parser Text
encName      = do
        Char
c  <- (Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isEncStart
        Text
cs <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isEnc
        Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
T.cons Char
c Text
cs)
    isEncStart :: Char -> Bool
isEncStart Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
                 | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
                 | Bool
otherwise = Bool
False
    isEnc :: Char -> Bool
isEnc      Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
                 | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
                 | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9' = Bool
True
                 | Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'.',Char
'_',Char
'-'] = Bool
True
                 | Bool
otherwise = Bool
False