{-# LANGUAGE LambdaCase            #-}
{-# LANGUAGE OverloadedStrings     #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Parsing
   Copyright   : Copyright (C) 2006-2022 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Parsing functions and utilities.
-}
module Text.Pandoc.Readers.HTML.Parsing
  ( TagOmission (..)
  , pInTags
  , pInTags'
  , pInTag
  , pInTagWithAttribs
  , pAny
  , pCloses
  , pSatisfy
  , pBlank
  , matchTagClose
  , matchTagOpen
  , isSpace
  , maybeFromAttrib
  , toAttr
  , toStringAttr
  )
where

import Control.Monad (void, mzero, mplus)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
  ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) )
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition (Attr)
import Text.Pandoc.Parsing
  ( (<|>), eof, getPosition, lookAhead, manyTill, newPos, option, optional
  , skipMany, setPosition, token, try)
import Text.Pandoc.Readers.HTML.TagCategories
import Text.Pandoc.Readers.HTML.Types
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (html5Attributes, html4Attributes, rdfaAttributes)
import qualified Data.Set as Set
import qualified Data.Text as T

-- | Whether no tag, the closing tag, or both tags can be omitted.
data TagOmission
  = TagsRequired       -- ^ Opening and closing tags are both required
  | ClosingTagOptional -- ^ The closing tag can be omitted
  | TagsOmittable      -- ^ Both tags, opening and closing, can be omitted.
  deriving (TagOmission -> TagOmission -> Bool
(TagOmission -> TagOmission -> Bool)
-> (TagOmission -> TagOmission -> Bool) -> Eq TagOmission
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TagOmission -> TagOmission -> Bool
$c/= :: TagOmission -> TagOmission -> Bool
== :: TagOmission -> TagOmission -> Bool
$c== :: TagOmission -> TagOmission -> Bool
Eq)

pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a
pInTags :: Text -> TagParser m a -> TagParser m a
pInTags tagtype :: Text
tagtype parser :: TagParser m a
parser = Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
forall (m :: * -> *) a.
(PandocMonad m, Monoid a) =>
Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
pInTags' Text
tagtype (Bool -> Tag Text -> Bool
forall a b. a -> b -> a
const Bool
True) TagParser m a
parser

pInTags' :: (PandocMonad m, Monoid a)
         => Text
         -> (Tag Text -> Bool)
         -> TagParser m a
         -> TagParser m a
pInTags' :: Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
pInTags' tagtype :: Text
tagtype tagtest :: Tag Text -> Bool
tagtest parser :: TagParser m a
parser = TagParser m a -> TagParser m a
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m a -> TagParser m a) -> TagParser m a -> TagParser m a
forall a b. (a -> b) -> a -> b
$ do
  (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy ((Tag Text -> Bool) -> TagParser m (Tag Text))
-> (Tag Text -> Bool) -> TagParser m (Tag Text)
forall a b. (a -> b) -> a -> b
$ \t :: Tag Text
t -> Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen Text
tagtype [] Tag Text
t Bool -> Bool -> Bool
&& Tag Text -> Bool
tagtest Tag Text
t
  [a] -> a
forall a. Monoid a => [a] -> a
mconcat ([a] -> a)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [a]
-> TagParser m a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TagParser m a
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [a]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill TagParser m a
parser (Text -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => Text -> TagParser m ()
pCloses Text
tagtype ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof)

pInTag :: PandocMonad m
       => TagOmission    -- ^ Whether some tags can be omitted
       -> Text           -- ^ @tagtype@ Tag name
       -> TagParser m a  -- ^ @p@ Content parser
       -> TagParser m a
pInTag :: TagOmission -> Text -> TagParser m a -> TagParser m a
pInTag tagOmission :: TagOmission
tagOmission tagtype :: Text
tagtype = (([(Text, Text)], a) -> a)
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
-> TagParser m a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([(Text, Text)], a) -> a
forall a b. (a, b) -> b
snd (ParsecT
   [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
 -> TagParser m a)
-> (TagParser m a
    -> ParsecT
         [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a))
-> TagParser m a
-> TagParser m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TagOmission
-> Text
-> TagParser m a
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
forall (m :: * -> *) a.
PandocMonad m =>
TagOmission
-> Text -> TagParser m a -> TagParser m ([(Text, Text)], a)
pInTagWithAttribs TagOmission
tagOmission Text
tagtype

-- | Returns the contents of a tag together with its attributes; parses
-- @p@, preceded by an opening tag (optional if TagsOmittable) and
-- followed by a closing tag (optional unless TagsRequired).
pInTagWithAttribs :: PandocMonad m
                  => TagOmission    -- ^ Whether some tags can be omitted
                  -> Text           -- ^ @tagtype@ Tag name
                  -> TagParser m a  -- ^ @p@ Content parser
                  -> TagParser m ([Attribute Text], a)
pInTagWithAttribs :: TagOmission
-> Text -> TagParser m a -> TagParser m ([(Text, Text)], a)
pInTagWithAttribs tagOmission :: TagOmission
tagOmission tagtype :: Text
tagtype p :: TagParser m a
p = TagParser m ([(Text, Text)], a) -> TagParser m ([(Text, Text)], a)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m ([(Text, Text)], a)
 -> TagParser m ([(Text, Text)], a))
-> TagParser m ([(Text, Text)], a)
-> TagParser m ([(Text, Text)], a)
forall a b. (a -> b) -> a -> b
$ do
  let openingOptional :: Bool
openingOptional = TagOmission
tagOmission TagOmission -> TagOmission -> Bool
forall a. Eq a => a -> a -> Bool
== TagOmission
TagsOmittable
  let closingOptional :: Bool
closingOptional = TagOmission
tagOmission TagOmission -> TagOmission -> Bool
forall a. Eq a => a -> a -> Bool
/= TagOmission
TagsRequired
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [(Text, Text)]
attribs <- (if Bool
openingOptional then [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [] else ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall a. a -> a
id)
             (Tag Text -> [(Text, Text)]
forall str. Tag str -> [Attribute str]
getAttribs (Tag Text -> [(Text, Text)])
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen Text
tagtype []))
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  a
x <- TagParser m a
p
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  (if Bool
closingOptional then ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional else ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void) (ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ())
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall a b. (a -> b) -> a -> b
$
    (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
tagtype)
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  ([(Text, Text)], a) -> TagParser m ([(Text, Text)], a)
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Text, Text)]
attribs, a
x)
  where
    getAttribs :: Tag str -> [Attribute str]
getAttribs = \case
      TagOpen _ attribs :: [Attribute str]
attribs -> [Attribute str]
attribs
      _                 -> []

pCloses :: PandocMonad m => Text -> TagParser m ()
pCloses :: Text -> TagParser m ()
pCloses tagtype :: Text
tagtype = TagParser m () -> TagParser m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m () -> TagParser m ())
-> TagParser m () -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ do
  Tag Text
t <- ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text))
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall a b. (a -> b) -> a -> b
$ (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy ((Tag Text -> Bool)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text))
-> (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall a b. (a -> b) -> a -> b
$ \tag :: Tag Text
tag -> Tag Text -> Bool
forall str. Tag str -> Bool
isTagClose Tag Text
tag Bool -> Bool -> Bool
|| Tag Text -> Bool
forall str. Tag str -> Bool
isTagOpen Tag Text
tag
  case Tag Text
t of
       (TagClose t' :: Text
t') | Text
t' Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
tagtype -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> TagParser m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *). PandocMonad m => TagParser m (Tag Text)
pAny
       (TagOpen t' :: Text
t' _) | Text
t' Text -> Text -> Bool
`closes` Text
tagtype -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "ul") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "li" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "ol") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "li" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "dl") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "dd" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "td" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "th" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "tr" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "td") | Text
tagtype Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose "th") | Text
tagtype Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose t' :: Text
t') | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "p" Bool -> Bool -> Bool
&& Text
t' Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags
                                            -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return () -- see #3794
       _ -> TagParser m ()
forall (m :: * -> *) a. MonadPlus m => m a
mzero

pBlank :: PandocMonad m => TagParser m ()
pBlank :: TagParser m ()
pBlank = ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> TagParser m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
 -> TagParser m ())
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> TagParser m ()
forall a b. (a -> b) -> a -> b
$ (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy Tag Text -> Bool
isBlank
 where
  isBlank :: Tag Text -> Bool
isBlank (TagText t :: Text
t) = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
t
  isBlank (TagComment _) = Bool
True
  isBlank _ = Bool
False

pLocation :: PandocMonad m => TagParser m ()
pLocation :: TagParser m ()
pLocation = do
  (TagPosition r :: Row
r c :: Row
c) <- (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSat Tag Text -> Bool
forall str. Tag str -> Bool
isTagPosition
  SourcePos -> TagParser m ()
forall (m :: * -> *) s u. Monad m => SourcePos -> ParsecT s u m ()
setPosition (SourcePos -> TagParser m ()) -> SourcePos -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ SourceName -> Row -> Row -> SourcePos
newPos "input" Row
r Row
c

pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat :: (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat f :: Tag Text -> Bool
f = do
  SourcePos
pos <- ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  (Tag Text -> Text)
-> (Tag Text -> SourcePos)
-> (Tag Text -> Maybe (Tag Text))
-> TagParser m (Tag Text)
forall s (m :: * -> *) t a st.
Stream s m t =>
(t -> Text)
-> (t -> SourcePos) -> (t -> Maybe a) -> ParsecT s st m a
token Tag Text -> Text
forall a. Show a => a -> Text
tshow (SourcePos -> Tag Text -> SourcePos
forall a b. a -> b -> a
const SourcePos
pos) (\x :: Tag Text
x -> if Tag Text -> Bool
f Tag Text
x then Tag Text -> Maybe (Tag Text)
forall a. a -> Maybe a
Just Tag Text
x else Maybe (Tag Text)
forall a. Maybe a
Nothing)

pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy :: (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy f :: Tag Text -> Bool
f = TagParser m (Tag Text) -> TagParser m (Tag Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m (Tag Text) -> TagParser m (Tag Text))
-> TagParser m (Tag Text) -> TagParser m (Tag Text)
forall a b. (a -> b) -> a -> b
$ ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pLocation ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> TagParser m (Tag Text) -> TagParser m (Tag Text)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSat Tag Text -> Bool
f

matchTagClose :: Text -> (Tag Text -> Bool)
matchTagClose :: Text -> Tag Text -> Bool
matchTagClose t :: Text
t = (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== Text -> Tag Text
forall str. str -> Tag str
TagClose Text
t)

matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool)
matchTagOpen :: Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen t :: Text
t as :: [(Text, Text)]
as = (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== Text -> [(Text, Text)] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
t [(Text, Text)]
as)

pAny :: PandocMonad m => TagParser m (Tag Text)
pAny :: TagParser m (Tag Text)
pAny = (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Bool -> Tag Text -> Bool
forall a b. a -> b -> a
const Bool
True)

isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace ' '  = Bool
True
isSpace '\t' = Bool
True
isSpace '\n' = Bool
True
isSpace '\r' = Bool
True
isSpace _    = Bool
False

-- taken from HXT and extended
-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
closes :: Text -> Text -> Bool
_ closes :: Text -> Text -> Bool
`closes` "body" = Bool
False
_ `closes` "html" = Bool
False
"body" `closes` "head" = Bool
True
"a" `closes` "a" = Bool
True
"li" `closes` "li" = Bool
True
"th" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["th","td"] = Bool
True
"td" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["th","td"] = Bool
True
"tr" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["th","td","tr","colgroup"] = Bool
True
"dd" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["dt", "dd"] = Bool
True
"dt" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["dt","dd"] = Bool
True
"rt" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["rb", "rt", "rtc"] = Bool
True
"col" `closes` "col" = Bool
True
"colgroup" `closes` "col" = Bool
True
"optgroup" `closes` "optgroup" = Bool
True
"optgroup" `closes` "option" = Bool
True
"option" `closes` "option" = Bool
True
-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
x :: Text
x `closes` "p" | Text
x Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["address", "article", "aside", "blockquote",
   "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
   "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", "p", "pre", "section",
   "table", "ul"] = Bool
True
_ `closes` "meta" = Bool
True
"form" `closes` "form" = Bool
True
"label" `closes` "label" = Bool
True
"map" `closes` "map" = Bool
True
"object" `closes` "object" = Bool
True
_ `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["option","style","script","textarea","title"] = Bool
True
t :: Text
t `closes` "select" | Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= "option" = Bool
True
"thead" `closes` "colgroup" = Bool
True
"tfoot" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["thead","colgroup"] = Bool
True
"tbody" `closes` t :: Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["tbody","tfoot","thead","colgroup"] = Bool
True
t :: Text
t `closes` t2 :: Text
t2 |
   Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","main","p"] Bool -> Bool -> Bool
&&
   Text
t2 Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ["h1","h2","h3","h4","h5","h6","p" ] = Bool
True -- not "div" or "main"
t1 :: Text
t1 `closes` t2 :: Text
t2 |
   Text
t1 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockTags Bool -> Bool -> Bool
&&
   Text
t2 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set Text
blockTags Bool -> Bool -> Bool
&&
   Text
t2 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set Text
eitherBlockOrInline = Bool
True
_ `closes` _ = Bool
False

toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr = ((Text, Text) -> [(Text, Text)] -> [(Text, Text)])
-> [(Text, Text)] -> [(Text, Text)] -> [(Text, Text)]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go []
  where
   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
   -- treat xml:lang as lang
   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go ("xml:lang",y :: Text
y) ats :: [(Text, Text)]
ats = (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go ("lang",Text
y) [(Text, Text)]
ats
   -- prevent duplicate attributes
   go (x :: Text
x,y :: Text
y) ats :: [(Text, Text)]
ats
     | ((Text, Text) -> Bool) -> [(Text, Text)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\(x' :: Text
x',_) -> Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
x') [(Text, Text)]
ats = [(Text, Text)]
ats
     | Bool
otherwise      =
        case Text -> Text -> Maybe Text
T.stripPrefix "data-" Text
x of
          Just x' :: Text
x' | Text
x' Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` (Set Text
html5Attributes Set Text -> Set Text -> Set Text
forall a. Semigroup a => a -> a -> a
<>
                                        Set Text
html4Attributes Set Text -> Set Text -> Set Text
forall a. Semigroup a => a -> a -> a
<> Set Text
rdfaAttributes)
            -> (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go (Text
x',Text
y) [(Text, Text)]
ats
          _ -> (Text
x,Text
y)(Text, Text) -> [(Text, Text)] -> [(Text, Text)]
forall a. a -> [a] -> [a]
:[(Text, Text)]
ats

-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
maybeFromAttrib :: Text -> Tag Text -> Maybe Text
maybeFromAttrib :: Text -> Tag Text -> Maybe Text
maybeFromAttrib name :: Text
name (TagOpen _ attrs :: [(Text, Text)]
attrs) = Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
name [(Text, Text)]
attrs
maybeFromAttrib _ _ = Maybe Text
forall a. Maybe a
Nothing

mkAttr :: [(Text, Text)] -> Attr
mkAttr :: [(Text, Text)] -> Attr
mkAttr attr :: [(Text, Text)]
attr = (Text
attribsId, [Text]
attribsClasses, [(Text, Text)]
attribsKV)
  where attribsId :: Text
attribsId = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe "" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "id" [(Text, Text)]
attr Maybe Text -> Maybe Text -> Maybe Text
forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
`mplus` Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "name" [(Text, Text)]
attr
        attribsClasses :: [Text]
attribsClasses = Text -> [Text]
T.words (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe "" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "class" [(Text, Text)]
attr) [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
epubTypes
        attribsKV :: [(Text, Text)]
attribsKV = ((Text, Text) -> Bool) -> [(Text, Text)] -> [(Text, Text)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(k :: Text
k,_) -> Text
k Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= "class" Bool -> Bool -> Bool
&& Text
k Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= "id" Bool -> Bool -> Bool
&& Text
k Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= "name")
                           [(Text, Text)]
attr
        epubTypes :: [Text]
epubTypes = Text -> [Text]
T.words (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe "" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "epub:type" [(Text, Text)]
attr

toAttr :: [(Text, Text)] -> Attr
toAttr :: [(Text, Text)] -> Attr
toAttr = [(Text, Text)] -> Attr
mkAttr ([(Text, Text)] -> Attr)
-> ([(Text, Text)] -> [(Text, Text)]) -> [(Text, Text)] -> Attr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Text, Text)] -> [(Text, Text)]
toStringAttr