{-# LANGUAGE TupleSections     #-}
{-# LANGUAGE ViewPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.EPUB
   Copyright   : Copyright (C) 2014-2020 Matthew Pickering
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of EPUB to 'Pandoc' document.
-}

module Text.Pandoc.Readers.EPUB
  (readEPUB)
  where

import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                          toArchiveOrFail)
import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (mapMaybe)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Network.URI (unEscapeString)
import System.FilePath (dropFileName, dropFileName, normalise, splitFileName,
                        takeFileName, (</>))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia)
import Text.Pandoc.Definition hiding (Attr)
import Text.Pandoc.Error
import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
import Text.Pandoc.Walk (query, walk)
import Text.Pandoc.XML.Light

type Items = M.Map Text (FilePath, MimeType)

readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB :: ReaderOptions -> ByteString -> m Pandoc
readEPUB opts :: ReaderOptions
opts bytes :: ByteString
bytes = case ByteString -> Either String Archive
toArchiveOrFail ByteString
bytes of
  Right archive :: Archive
archive -> ReaderOptions -> Archive -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Archive -> m Pandoc
archiveToEPUB ReaderOptions
opts Archive
archive
  Left  _       -> PandocError -> m Pandoc
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Pandoc) -> PandocError -> m Pandoc
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocParseError "Couldn't extract ePub file"

-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept

-- Note that internal reference are aggressively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB :: ReaderOptions -> Archive -> m Pandoc
archiveToEPUB os :: ReaderOptions
os archive :: Archive
archive = do
  -- root is path to folder with manifest file in
  (root :: String
root, content :: Element
content) <- Archive -> m (String, Element)
forall (m :: * -> *).
PandocMonad m =>
Archive -> m (String, Element)
getManifest Archive
archive
  (coverId :: Maybe Text
coverId, meta :: Meta
meta) <- Element -> m (Maybe Text, Meta)
forall (m :: * -> *).
PandocMonad m =>
Element -> m (Maybe Text, Meta)
parseMeta Element
content
  (cover :: Maybe String
cover, items :: Items
items)  <- Element -> Maybe Text -> m (Maybe String, Items)
forall (m :: * -> *).
PandocMonad m =>
Element -> Maybe Text -> m (Maybe String, Items)
parseManifest Element
content Maybe Text
coverId
  -- No need to collapse here as the image path is from the manifest file
  let coverDoc :: Pandoc
coverDoc = Pandoc -> (String -> Pandoc) -> Maybe String -> Pandoc
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Pandoc
forall a. Monoid a => a
mempty String -> Pandoc
imageToPandoc Maybe String
cover
  [(String, Text)]
spine <- Items -> Element -> m [(String, Text)]
forall (m :: * -> *).
PandocMonad m =>
Items -> Element -> m [(String, Text)]
parseSpine Items
items Element
content
  let escapedSpine :: [Text]
escapedSpine = ((String, Text) -> Text) -> [(String, Text)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
escapeURI (Text -> Text)
-> ((String, Text) -> Text) -> (String, Text) -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text)
-> ((String, Text) -> String) -> (String, Text) -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
takeFileName (String -> String)
-> ((String, Text) -> String) -> (String, Text) -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String, Text) -> String
forall a b. (a, b) -> a
fst) [(String, Text)]
spine
  Pandoc _ bs :: [Block]
bs <-
      (Pandoc -> (String, Text) -> m Pandoc)
-> Pandoc -> [(String, Text)] -> m Pandoc
forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' (\a :: Pandoc
a b :: (String, Text)
b -> ((Pandoc
a Pandoc -> Pandoc -> Pandoc
forall a. Semigroup a => a -> a -> a
<>) (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Text] -> Inline -> Inline
prependHash [Text]
escapedSpine))
        (Pandoc -> Pandoc) -> m Pandoc -> m Pandoc
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` String -> (String, Text) -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
String -> (String, Text) -> m Pandoc
parseSpineElem String
root (String, Text)
b) Pandoc
forall a. Monoid a => a
mempty [(String, Text)]
spine
  let ast :: Pandoc
ast = Pandoc
coverDoc Pandoc -> Pandoc -> Pandoc
forall a. Semigroup a => a -> a -> a
<> Meta -> [Block] -> Pandoc
Pandoc Meta
meta [Block]
bs
  [(String, Text)] -> String -> Archive -> Pandoc -> m ()
forall (m :: * -> *).
PandocMonad m =>
[(String, Text)] -> String -> Archive -> Pandoc -> m ()
fetchImages (Items -> [(String, Text)]
forall k a. Map k a -> [a]
M.elems Items
items) String
root Archive
archive Pandoc
ast
  Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return Pandoc
ast
  where
    os' :: ReaderOptions
os' = ReaderOptions
os {readerExtensions :: Extensions
readerExtensions = Extension -> Extensions -> Extensions
enableExtension Extension
Ext_raw_html (ReaderOptions -> Extensions
readerExtensions ReaderOptions
os)}
    parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
    parseSpineElem :: String -> (String, Text) -> m Pandoc
parseSpineElem (String -> String
normalise -> String
r) (String -> String
normalise -> String
path, mime :: Text
mime) = do
      Pandoc
doc <- Text -> String -> String -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
Text -> String -> String -> m Pandoc
mimeToReader Text
mime String
r String
path
      let docSpan :: Pandoc
docSpan = Blocks -> Pandoc
B.doc (Blocks -> Pandoc) -> Blocks -> Pandoc
forall a b. (a -> b) -> a -> b
$ Inlines -> Blocks
B.para (Inlines -> Blocks) -> Inlines -> Blocks
forall a b. (a -> b) -> a -> b
$ Attr -> Inlines -> Inlines
B.spanWith (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String -> String
takeFileName String
path, [], []) Inlines
forall a. Monoid a => a
mempty
      Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$ Pandoc
docSpan Pandoc -> Pandoc -> Pandoc
forall a. Semigroup a => a -> a -> a
<> Pandoc
doc
    mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
    mimeToReader :: Text -> String -> String -> m Pandoc
mimeToReader "application/xhtml+xml" (String -> String
unEscapeString -> String
root)
                                         (String -> String
unEscapeString -> String
path) = do
      Entry
fname <- String -> Archive -> m Entry
forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE (String
root String -> String -> String
</> String
path) Archive
archive
      Pandoc
html <- ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readHtml ReaderOptions
os' (Text -> m Pandoc)
-> (ByteString -> Text) -> ByteString -> m Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
TL.toStrict (Text -> Text) -> (ByteString -> Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8 (ByteString -> m Pandoc) -> ByteString -> m Pandoc
forall a b. (a -> b) -> a -> b
$ Entry -> ByteString
fromEntry Entry
fname
      Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$ String -> Pandoc -> Pandoc
fixInternalReferences String
path Pandoc
html
    mimeToReader s :: Text
s _ (String -> String
unEscapeString -> String
path)
      | Text
s Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
imageMimes = Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$ String -> Pandoc
imageToPandoc String
path
      | Bool
otherwise = Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return Pandoc
forall a. Monoid a => a
mempty

-- paths should be absolute when this function is called
-- renameImages should do this
fetchImages :: PandocMonad m
            => [(FilePath, MimeType)]
            -> FilePath -- ^ Root
            -> Archive
            -> Pandoc
            -> m ()
fetchImages :: [(String, Text)] -> String -> Archive -> Pandoc -> m ()
fetchImages mimes :: [(String, Text)]
mimes root :: String
root arc :: Archive
arc ((Inline -> [String]) -> Pandoc -> [String]
forall a b c. (Walkable a b, Monoid c) => (a -> c) -> b -> c
query Inline -> [String]
iq -> [String]
links) =
    ((String, Maybe Text, ByteString) -> m ())
-> [(String, Maybe Text, ByteString)] -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ((String -> Maybe Text -> ByteString -> m ())
-> (String, Maybe Text, ByteString) -> m ()
forall a b c d. (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 String -> Maybe Text -> ByteString -> m ()
forall (m :: * -> *).
PandocMonad m =>
String -> Maybe Text -> ByteString -> m ()
insertMedia) ((String -> Maybe (String, Maybe Text, ByteString))
-> [String] -> [(String, Maybe Text, ByteString)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe String -> Maybe (String, Maybe Text, ByteString)
getEntry [String]
links)
  where
    getEntry :: String -> Maybe (String, Maybe Text, ByteString)
getEntry link :: String
link =
        let abslink :: String
abslink = String -> String
normalise (String -> String
unEscapeString (String
root String -> String -> String
</> String
link)) in
        (String
link , String -> [(String, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
link [(String, Text)]
mimes, ) (ByteString -> (String, Maybe Text, ByteString))
-> (Entry -> ByteString)
-> Entry
-> (String, Maybe Text, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry
          (Entry -> (String, Maybe Text, ByteString))
-> Maybe Entry -> Maybe (String, Maybe Text, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Archive -> Maybe Entry
findEntryByPath String
abslink Archive
arc

iq :: Inline -> [FilePath]
iq :: Inline -> [String]
iq (Image _ _ (url :: Text
url, _)) = [Text -> String
T.unpack Text
url]
iq _                    = []

-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
renameImages :: String -> Inline -> Inline
renameImages root :: String
root img :: Inline
img@(Image attr :: Attr
attr a :: [Inline]
a (url :: Text
url, b :: Text
b))
  | "data:" Text -> Text -> Bool
`T.isPrefixOf` Text
url = Inline
img
  | Bool
otherwise                  = Attr -> [Inline] -> Target -> Inline
Image Attr
attr [Inline]
a ( String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String -> String
collapseFilePath (String
root String -> String -> String
</> Text -> String
T.unpack Text
url)
                                              , Text
b)
renameImages _ x :: Inline
x = Inline
x

imageToPandoc :: FilePath -> Pandoc
imageToPandoc :: String -> Pandoc
imageToPandoc s :: String
s = Blocks -> Pandoc
B.doc (Blocks -> Pandoc) -> (Inlines -> Blocks) -> Inlines -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Inlines -> Blocks
B.para (Inlines -> Pandoc) -> Inlines -> Pandoc
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Inlines -> Inlines
B.image (String -> Text
T.pack String
s) "" Inlines
forall a. Monoid a => a
mempty

imageMimes :: [MimeType]
imageMimes :: [Text]
imageMimes = ["image/gif", "image/jpeg", "image/png"]

type CoverId = Text

type CoverImage = FilePath

parseManifest :: (PandocMonad m)
              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
parseManifest :: Element -> Maybe Text -> m (Maybe String, Items)
parseManifest content :: Element
content coverId :: Maybe Text
coverId = do
  Element
manifest <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName "manifest") Element
content
  let items :: [Element]
items = QName -> Element -> [Element]
findChildren (Text -> QName
dfName "item") Element
manifest
  [(Text, (String, Text))]
r <- (Element -> m (Text, (String, Text)))
-> [Element] -> m [(Text, (String, Text))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Element -> m (Text, (String, Text))
forall (m :: * -> *).
PandocMonad m =>
Element -> m (Text, (String, Text))
parseItem [Element]
items
  let cover :: Maybe Text
cover = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "href") (Element -> Maybe Text) -> Maybe Element -> Maybe Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
manifest
  (Maybe String, Items) -> m (Maybe String, Items)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> String
T.unpack (Text -> String) -> Maybe Text -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Maybe Text
cover Maybe Text -> Maybe Text -> Maybe Text
forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
`mplus` Maybe Text
coverId), [(Text, (String, Text))] -> Items
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, (String, Text))]
r)
  where
    findCover :: Element -> Bool
findCover e :: Element
e = Bool -> (Text -> Bool) -> Maybe Text -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Text -> Text -> Bool
T.isInfixOf "cover-image")
                  (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "properties") Element
e)
               Bool -> Bool -> Bool
|| Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== (Text -> Text -> Bool) -> Maybe Text -> Maybe Text -> Maybe Bool
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) Maybe Text
coverId (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "id") Element
e)
    parseItem :: Element -> m (Text, (String, Text))
parseItem e :: Element
e = do
      Text
uid <- QName -> Element -> m Text
forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName "id") Element
e
      Text
href <- QName -> Element -> m Text
forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName "href") Element
e
      Text
mime <- QName -> Element -> m Text
forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName "media-type") Element
e
      (Text, (String, Text)) -> m (Text, (String, Text))
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
uid, (Text -> String
T.unpack Text
href, Text
mime))

parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine :: Items -> Element -> m [(String, Text)]
parseSpine is :: Items
is e :: Element
e = do
  Element
spine <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName "spine") Element
e
  let itemRefs :: [Element]
itemRefs = QName -> Element -> [Element]
findChildren (Text -> QName
dfName "itemref") Element
spine
  (Text -> m (String, Text)) -> [Text] -> m [(String, Text)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Text -> Maybe (String, Text) -> m (String, Text)
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE "parseSpine" (Maybe (String, Text) -> m (String, Text))
-> (Text -> Maybe (String, Text)) -> Text -> m (String, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Items -> Maybe (String, Text))
-> Items -> Text -> Maybe (String, Text)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> Items -> Maybe (String, Text)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Items
is) ([Text] -> m [(String, Text)]) -> [Text] -> m [(String, Text)]
forall a b. (a -> b) -> a -> b
$ (Element -> Maybe Text) -> [Element] -> [Text]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Element -> Maybe Text
parseItemRef [Element]
itemRefs
  where
    parseItemRef :: Element -> Maybe Text
parseItemRef ref :: Element
ref = do
      let linear :: Bool
linear = Bool -> (Text -> Bool) -> Maybe Text -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "yes") (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "linear") Element
ref)
      Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard Bool
linear
      QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "idref") Element
ref

parseMeta :: PandocMonad m => Element -> m (Maybe CoverId, Meta)
parseMeta :: Element -> m (Maybe Text, Meta)
parseMeta content :: Element
content = do
  Element
meta <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName "metadata") Element
content
  let dcspace :: QName -> Bool
dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = Bool
True
      dcspace _ = Bool
False
  let dcs :: [Element]
dcs = (QName -> Bool) -> Element -> [Element]
filterChildrenName QName -> Bool
dcspace Element
meta
  let r :: Meta
r = (Element -> Meta -> Meta) -> Meta -> [Element] -> Meta
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Element -> Meta -> Meta
parseMetaItem Meta
nullMeta [Element]
dcs
  let coverId :: Maybe Text
coverId = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "content") (Element -> Maybe Text) -> Maybe Element -> Maybe Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
meta
  (Maybe Text, Meta) -> m (Maybe Text, Meta)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Text
coverId, Meta
r)
  where
    findCover :: Element -> Bool
findCover e :: Element
e = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName "name") Element
e Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just "cover"

-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e :: Element
e@(QName -> Text
stripNamespace (QName -> Text) -> (Element -> QName) -> Element -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> QName
elName -> Text
field) meta :: Meta
meta =
  Text -> Inlines -> Meta -> Meta
forall a. ToMetaValue a => Text -> a -> Meta -> Meta
addMetaField (Text -> Text
renameMeta Text
field) (Text -> Inlines
B.str (Text -> Inlines) -> Text -> Inlines
forall a b. (a -> b) -> a -> b
$ Element -> Text
strContent Element
e) Meta
meta

renameMeta :: Text -> Text
renameMeta :: Text -> Text
renameMeta "creator" = "author"
renameMeta s :: Text
s         = Text
s

getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest :: Archive -> m (String, Element)
getManifest archive :: Archive
archive = do
  Entry
metaEntry <- String -> Archive -> m Entry
forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE ("META-INF" String -> String -> String
</> "container.xml") Archive
archive
  Element
docElem <- Entry -> m Element
forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
metaEntry
  let namespaces :: [Target]
namespaces = (Attr -> Maybe Target) -> [Attr] -> [Target]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Attr -> Maybe Target
attrToNSPair (Element -> [Attr]
elAttribs Element
docElem)
  Text
ns <- Text -> Maybe Text -> m Text
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE "xmlns not in namespaces" (Text -> [Target] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "xmlns" [Target]
namespaces)
  [Target]
as <- (Element -> [Target]) -> m Element -> m [Target]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Attr -> Target) -> [Attr] -> [Target]
forall a b. (a -> b) -> [a] -> [b]
map Attr -> Target
attrToPair ([Attr] -> [Target]) -> (Element -> [Attr]) -> Element -> [Target]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> [Attr]
elAttribs)
    (QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> Maybe Text -> Maybe Text -> QName
QName "rootfile" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
ns) Maybe Text
forall a. Maybe a
Nothing) Element
docElem)
  String
manifestFile <- Text -> String
T.unpack (Text -> String) -> m Text -> m String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe Text -> m Text
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE "Root not found" (Text -> [Target] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "full-path" [Target]
as)
  let rootdir :: String
rootdir = String -> String
dropFileName String
manifestFile
  --mime <- lookup "media-type" as
  Entry
manifest <- String -> Archive -> m Entry
forall (m :: * -> *). PandocMonad m => String -> Archive -> m Entry
findEntryByPathE String
manifestFile Archive
archive
  (String
rootdir,) (Element -> (String, Element)) -> m Element -> m (String, Element)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Entry -> m Element
forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
manifest

-- Fixup

fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences :: String -> Pandoc -> Pandoc
fixInternalReferences pathToFile :: String
pathToFile =
   (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Inline -> Inline
renameImages String
root)
  (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Block -> Block) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Block -> Block
fixBlockIRs String
filename)
  (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk (String -> Inline -> Inline
fixInlineIRs String
filename)
  where
    (root :: String
root, Text -> String
T.unpack (Text -> String) -> (String -> Text) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
escapeURI (Text -> Text) -> (String -> Text) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack -> String
filename) =
      String -> (String, String)
splitFileName String
pathToFile

fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs s :: String
s (Span as :: Attr
as v :: [Inline]
v) =
  Attr -> [Inline] -> Inline
Span (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
v
fixInlineIRs s :: String
s (Code as :: Attr
as code :: Text
code) =
  Attr -> Text -> Inline
Code (String -> Attr -> Attr
fixAttrs String
s Attr
as) Text
code
fixInlineIRs s :: String
s (Link as :: Attr
as is :: [Inline]
is (Text -> Maybe (Char, Text)
T.uncons -> Just ('#', url :: Text
url), tit :: Text
tit)) =
  Attr -> [Inline] -> Target -> Inline
Link (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
is (String -> Text -> Text
addHash String
s Text
url, Text
tit)
fixInlineIRs s :: String
s (Link as :: Attr
as is :: [Inline]
is t :: Target
t) =
  Attr -> [Inline] -> Target -> Inline
Link (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
is Target
t
fixInlineIRs _ v :: Inline
v = Inline
v

prependHash :: [Text] -> Inline -> Inline
prependHash :: [Text] -> Inline -> Inline
prependHash ps :: [Text]
ps l :: Inline
l@(Link attr :: Attr
attr is :: [Inline]
is (url :: Text
url, tit :: Text
tit))
  | [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or [Text
s Text -> Text -> Bool
`T.isPrefixOf` Text
url | Text
s <- [Text]
ps] =
    Attr -> [Inline] -> Target -> Inline
Link Attr
attr [Inline]
is ("#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
url, Text
tit)
  | Bool
otherwise = Inline
l
prependHash _ i :: Inline
i = Inline
i

fixBlockIRs :: String -> Block -> Block
fixBlockIRs :: String -> Block -> Block
fixBlockIRs s :: String
s (Div as :: Attr
as b :: [Block]
b) =
  Attr -> [Block] -> Block
Div (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Block]
b
fixBlockIRs s :: String
s (Header i :: Int
i as :: Attr
as b :: [Inline]
b) =
  Int -> Attr -> [Inline] -> Block
Header Int
i (String -> Attr -> Attr
fixAttrs String
s Attr
as) [Inline]
b
fixBlockIRs s :: String
s (CodeBlock as :: Attr
as code :: Text
code) =
  Attr -> Text -> Block
CodeBlock (String -> Attr -> Attr
fixAttrs String
s Attr
as) Text
code
fixBlockIRs _ b :: Block
b = Block
b

fixAttrs :: FilePath -> B.Attr -> B.Attr
fixAttrs :: String -> Attr -> Attr
fixAttrs s :: String
s (ident :: Text
ident, cs :: [Text]
cs, kvs :: [Target]
kvs) =
  (String -> Text -> Text
addHash String
s Text
ident, (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
cs, [Target] -> [Target]
removeEPUBAttrs [Target]
kvs)

addHash :: FilePath -> Text -> Text
addHash :: String -> Text -> Text
addHash _ ""    = ""
addHash s :: String
s ident :: Text
ident = String -> Text
T.pack (String -> String
takeFileName String
s) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> "#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
ident

removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs :: [Target] -> [Target]
removeEPUBAttrs kvs :: [Target]
kvs = (Target -> Bool) -> [Target] -> [Target]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Target -> Bool) -> Target -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Target -> Bool
forall a. (Text, a) -> Bool
isEPUBAttr) [Target]
kvs

isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr (k :: Text
k, _) = "epub:" Text -> Text -> Bool
`T.isPrefixOf` Text
k

-- Library

-- Strict version of foldM
foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
foldM' :: (a -> b -> m a) -> a -> [b] -> m a
foldM' _ z :: a
z [] = a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
z
foldM' f :: a -> b -> m a
f z :: a
z (x :: b
x:xs :: [b]
xs) = do
  a
z' <- a -> b -> m a
f a
z b
x
  a
z' a -> m a -> m a
forall a b. NFData a => a -> b -> b
`deepseq` (a -> b -> m a) -> a -> [b] -> m a
forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' a -> b -> m a
f a
z' [b]
xs

uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 f :: a -> b -> c -> d
f (a :: a
a, b :: b
b, c :: c
c) = a -> b -> c -> d
f a
a b
b c
c

-- Utility

stripNamespace :: QName -> Text
stripNamespace :: QName -> Text
stripNamespace (QName v :: Text
v _ _) = Text
v

attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair :: Attr -> Maybe Target
attrToNSPair (Attr (QName "xmlns" _ _) val :: Text
val) = Target -> Maybe Target
forall a. a -> Maybe a
Just ("xmlns", Text
val)
attrToNSPair _                              = Maybe Target
forall a. Maybe a
Nothing

attrToPair :: Attr -> (Text, Text)
attrToPair :: Attr -> Target
attrToPair (Attr (QName name :: Text
name _ _) val :: Text
val) = (Text
name, Text
val)

defaultNameSpace :: Maybe Text
defaultNameSpace :: Maybe Text
defaultNameSpace = Text -> Maybe Text
forall a. a -> Maybe a
Just "http://www.idpf.org/2007/opf"

dfName :: Text -> QName
dfName :: Text -> QName
dfName s :: Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s Maybe Text
defaultNameSpace Maybe Text
forall a. Maybe a
Nothing

emptyName :: Text -> QName
emptyName :: Text -> QName
emptyName s :: Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s Maybe Text
forall a. Maybe a
Nothing Maybe Text
forall a. Maybe a
Nothing

-- Convert Maybe interface to Either

findAttrE :: PandocMonad m => QName -> Element -> m Text
findAttrE :: QName -> Element -> m Text
findAttrE q :: QName
q e :: Element
e = Text -> Maybe Text -> m Text
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE "findAttr" (Maybe Text -> m Text) -> Maybe Text -> m Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr QName
q Element
e

findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE :: String -> Archive -> m Entry
findEntryByPathE (String -> String
normalise (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
unEscapeString -> String
path) a :: Archive
a =
  Text -> Maybe Entry -> m Entry
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE ("No entry on path: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack String
path) (Maybe Entry -> m Entry) -> Maybe Entry -> m Entry
forall a b. (a -> b) -> a -> b
$ String -> Archive -> Maybe Entry
findEntryByPath String
path Archive
a

parseXMLDocE :: PandocMonad m => Entry -> m Element
parseXMLDocE :: Entry -> m Element
parseXMLDocE entry :: Entry
entry =
  (Text -> m Element)
-> (Element -> m Element) -> Either Text Element -> m Element
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (PandocError -> m Element
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Element)
-> (Text -> PandocError) -> Text -> m Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> PandocError
PandocXMLError Text
fp) Element -> m Element
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Text Element -> m Element)
-> Either Text Element -> m Element
forall a b. (a -> b) -> a -> b
$ Text -> Either Text Element
parseXMLElement Text
doc
 where
  doc :: Text
doc = ByteString -> Text
UTF8.toTextLazy (ByteString -> Text) -> (Entry -> ByteString) -> Entry -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry (Entry -> Text) -> Entry -> Text
forall a b. (a -> b) -> a -> b
$ Entry
entry
  fp :: Text
fp  = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Entry -> String
eRelativePath Entry
entry

findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE :: QName -> Element -> m Element
findElementE e :: QName
e x :: Element
x =
  Text -> Maybe Element -> m Element
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE ("Unable to find element: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> QName -> Text
forall a. Show a => a -> Text
tshow QName
e) (Maybe Element -> m Element) -> Maybe Element -> m Element
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Element
findElement QName
e Element
x

mkE :: PandocMonad m => Text -> Maybe a -> m a
mkE :: Text -> Maybe a -> m a
mkE s :: Text
s = m a -> (a -> m a) -> Maybe a -> m a
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m a) -> (Text -> PandocError) -> Text -> m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> PandocError
PandocParseError (Text -> m a) -> Text -> m a
forall a b. (a -> b) -> a -> b
$ Text
s) a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return