{-# OPTIONS_GHC -fno-warn-missing-methods #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Skylighting.Tokenizer (
tokenize
, TokenizerConfig(..)
) where
import Control.Applicative
import Control.Monad
import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.UTF8 as UTF8
import Data.CaseInsensitive (mk)
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe (catMaybes)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Debug.Trace
import Skylighting.Regex
import Skylighting.Types
import Skylighting.Parser (resolveKeywords)
import Data.List.NonEmpty (NonEmpty((:|)), (<|), toList)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
#endif
newtype ContextStack = ContextStack{ ContextStack -> NonEmpty Context
unContextStack :: NonEmpty Context }
deriving (Int -> ContextStack -> ShowS
[ContextStack] -> ShowS
ContextStack -> String
(Int -> ContextStack -> ShowS)
-> (ContextStack -> String)
-> ([ContextStack] -> ShowS)
-> Show ContextStack
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ContextStack] -> ShowS
$cshowList :: [ContextStack] -> ShowS
show :: ContextStack -> String
$cshow :: ContextStack -> String
showsPrec :: Int -> ContextStack -> ShowS
$cshowsPrec :: Int -> ContextStack -> ShowS
Show)
data TokenizerState = TokenizerState{
TokenizerState -> ByteString
input :: ByteString
, TokenizerState -> Bool
endline :: Bool
, TokenizerState -> Char
prevChar :: Char
, TokenizerState -> ContextStack
contextStack :: ContextStack
, TokenizerState -> IntMap ByteString
captures :: IntMap.IntMap ByteString
, TokenizerState -> Int
column :: Int
, TokenizerState -> Bool
lineContinuation :: Bool
, TokenizerState -> Maybe Int
firstNonspaceColumn :: Maybe Int
, TokenizerState -> Map RE Regex
compiledRegexes :: Map.Map RE Regex
}
data TokenizerConfig = TokenizerConfig{
TokenizerConfig -> SyntaxMap
syntaxMap :: SyntaxMap
, TokenizerConfig -> Bool
traceOutput :: Bool
} deriving (Int -> TokenizerConfig -> ShowS
[TokenizerConfig] -> ShowS
TokenizerConfig -> String
(Int -> TokenizerConfig -> ShowS)
-> (TokenizerConfig -> String)
-> ([TokenizerConfig] -> ShowS)
-> Show TokenizerConfig
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TokenizerConfig] -> ShowS
$cshowList :: [TokenizerConfig] -> ShowS
show :: TokenizerConfig -> String
$cshow :: TokenizerConfig -> String
showsPrec :: Int -> TokenizerConfig -> ShowS
$cshowsPrec :: Int -> TokenizerConfig -> ShowS
Show)
data Result e a = Success a
| Failure
| Error e
deriving (a -> Result e b -> Result e a
(a -> b) -> Result e a -> Result e b
(forall a b. (a -> b) -> Result e a -> Result e b)
-> (forall a b. a -> Result e b -> Result e a)
-> Functor (Result e)
forall a b. a -> Result e b -> Result e a
forall a b. (a -> b) -> Result e a -> Result e b
forall e a b. a -> Result e b -> Result e a
forall e a b. (a -> b) -> Result e a -> Result e b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Result e b -> Result e a
$c<$ :: forall e a b. a -> Result e b -> Result e a
fmap :: (a -> b) -> Result e a -> Result e b
$cfmap :: forall e a b. (a -> b) -> Result e a -> Result e b
Functor)
deriving instance (Show a, Show e) => Show (Result e a)
data TokenizerM a = TM { TokenizerM a
-> TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String a)
runTokenizerM :: TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String a) }
mapsnd :: (a -> b) -> (c, a) -> (c, b)
mapsnd :: (a -> b) -> (c, a) -> (c, b)
mapsnd f :: a -> b
f (x :: c
x, y :: a
y) = (c
x, a -> b
f a
y)
instance Functor TokenizerM where
fmap :: (a -> b) -> TokenizerM a -> TokenizerM b
fmap f :: a -> b
f (TM g :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
g) = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String b))
-> TokenizerM b
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s -> (Result String a -> Result String b)
-> (TokenizerState, Result String a)
-> (TokenizerState, Result String b)
forall a b c. (a -> b) -> (c, a) -> (c, b)
mapsnd ((a -> b) -> Result String a -> Result String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f) (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
g TokenizerConfig
c TokenizerState
s))
instance Applicative TokenizerM where
pure :: a -> TokenizerM a
pure x :: a
x = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\_ s :: TokenizerState
s -> (TokenizerState
s, a -> Result String a
forall e a. a -> Result e a
Success a
x))
(TM f :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String (a -> b))
f) <*> :: TokenizerM (a -> b) -> TokenizerM a -> TokenizerM b
<*> (TM y :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y) = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String b))
-> TokenizerM b
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s ->
case (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String (a -> b))
f TokenizerConfig
c TokenizerState
s) of
(s' :: TokenizerState
s', Failure ) -> (TokenizerState
s', Result String b
forall e a. Result e a
Failure)
(s' :: TokenizerState
s', Error e :: String
e ) -> (TokenizerState
s', String -> Result String b
forall e a. e -> Result e a
Error String
e)
(s' :: TokenizerState
s', Success f' :: a -> b
f') ->
case (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y TokenizerConfig
c TokenizerState
s') of
(s'' :: TokenizerState
s'', Failure ) -> (TokenizerState
s'', Result String b
forall e a. Result e a
Failure)
(s'' :: TokenizerState
s'', Error e' :: String
e' ) -> (TokenizerState
s'', String -> Result String b
forall e a. e -> Result e a
Error String
e')
(s'' :: TokenizerState
s'', Success y' :: a
y') -> (TokenizerState
s'', b -> Result String b
forall e a. a -> Result e a
Success (a -> b
f' a
y')))
instance Monad TokenizerM where
return :: a -> TokenizerM a
return = a -> TokenizerM a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
(TM x :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x) >>= :: TokenizerM a -> (a -> TokenizerM b) -> TokenizerM b
>>= f :: a -> TokenizerM b
f = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String b))
-> TokenizerM b
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x TokenizerConfig
c TokenizerState
s of
(s' :: TokenizerState
s', Failure ) -> (TokenizerState
s', Result String b
forall e a. Result e a
Failure)
(s' :: TokenizerState
s', Error e :: String
e ) -> (TokenizerState
s', String -> Result String b
forall e a. e -> Result e a
Error String
e)
(s' :: TokenizerState
s', Success x' :: a
x') -> TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String b)
g TokenizerConfig
c TokenizerState
s'
where TM g :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String b)
g = a -> TokenizerM b
f a
x')
instance Alternative TokenizerM where
empty :: TokenizerM a
empty = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\_ s :: TokenizerState
s -> (TokenizerState
s, Result String a
forall e a. Result e a
Failure))
<|> :: TokenizerM a -> TokenizerM a -> TokenizerM a
(<|>) (TM x :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x) (TM y :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y) = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x TokenizerConfig
c TokenizerState
s of
(_, Failure ) -> TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y TokenizerConfig
c TokenizerState
s
(s' :: TokenizerState
s', Error e :: String
e ) -> (TokenizerState
s', String -> Result String a
forall e a. e -> Result e a
Error String
e)
(s' :: TokenizerState
s', Success x' :: a
x') -> (TokenizerState
s', a -> Result String a
forall e a. a -> Result e a
Success a
x'))
many :: TokenizerM a -> TokenizerM [a]
many (TM x :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x) = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String [a]))
-> TokenizerM [a]
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s ->
case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x TokenizerConfig
c TokenizerState
s of
(_, Failure ) -> (TokenizerState
s, [a] -> Result String [a]
forall e a. a -> Result e a
Success [])
(s' :: TokenizerState
s', Error e :: String
e ) -> (TokenizerState
s', String -> Result String [a]
forall e a. e -> Result e a
Error String
e)
(s' :: TokenizerState
s', Success x' :: a
x') -> (Result String [a] -> Result String [a])
-> (TokenizerState, Result String [a])
-> (TokenizerState, Result String [a])
forall a b c. (a -> b) -> (c, a) -> (c, b)
mapsnd (([a] -> [a]) -> Result String [a] -> Result String [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a
x'a -> [a] -> [a]
forall a. a -> [a] -> [a]
:)) (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String [a])
g TokenizerConfig
c TokenizerState
s')
where TM g :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String [a])
g = TokenizerM a -> TokenizerM [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x))
some :: TokenizerM a -> TokenizerM [a]
some x :: TokenizerM a
x = (:) (a -> [a] -> [a]) -> TokenizerM a -> TokenizerM ([a] -> [a])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerM a
x TokenizerM ([a] -> [a]) -> TokenizerM [a] -> TokenizerM [a]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> TokenizerM a -> TokenizerM [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many TokenizerM a
x
instance MonadPlus TokenizerM where
mzero :: TokenizerM a
mzero = TokenizerM a
forall (f :: * -> *) a. Alternative f => f a
empty
mplus :: TokenizerM a -> TokenizerM a -> TokenizerM a
mplus = TokenizerM a -> TokenizerM a -> TokenizerM a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
(<|>)
instance MonadReader TokenizerConfig TokenizerM where
ask :: TokenizerM TokenizerConfig
ask = (TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String TokenizerConfig))
-> TokenizerM TokenizerConfig
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s -> (TokenizerState
s, TokenizerConfig -> Result String TokenizerConfig
forall e a. a -> Result e a
Success TokenizerConfig
c))
local :: (TokenizerConfig -> TokenizerConfig)
-> TokenizerM a -> TokenizerM a
local f :: TokenizerConfig -> TokenizerConfig
f (TM x :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x) = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s -> TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x (TokenizerConfig -> TokenizerConfig
f TokenizerConfig
c) TokenizerState
s)
instance MonadState TokenizerState TokenizerM where
get :: TokenizerM TokenizerState
get = (TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String TokenizerState))
-> TokenizerM TokenizerState
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\_ s :: TokenizerState
s -> (TokenizerState
s, TokenizerState -> Result String TokenizerState
forall e a. a -> Result e a
Success TokenizerState
s))
put :: TokenizerState -> TokenizerM ()
put x :: TokenizerState
x = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String ()))
-> TokenizerM ()
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\_ _ -> (TokenizerState
x, () -> Result String ()
forall e a. a -> Result e a
Success ()))
instance MonadError String TokenizerM where
throwError :: String -> TokenizerM a
throwError e :: String
e = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\_ s :: TokenizerState
s -> (TokenizerState
s, String -> Result String a
forall e a. e -> Result e a
Error String
e))
catchError :: TokenizerM a -> (String -> TokenizerM a) -> TokenizerM a
catchError (TM x :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x) f :: String -> TokenizerM a
f = (TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
forall a.
(TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a))
-> TokenizerM a
TM (\c :: TokenizerConfig
c s :: TokenizerState
s -> case TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
x TokenizerConfig
c TokenizerState
s of
(_, Error e :: String
e) -> let TM y :: TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y = String -> TokenizerM a
f String
e in TokenizerConfig
-> TokenizerState -> (TokenizerState, Result String a)
y TokenizerConfig
c TokenizerState
s
z :: (TokenizerState, Result String a)
z -> (TokenizerState, Result String a)
z)
tokenize :: TokenizerConfig -> Syntax -> Text -> Either String [SourceLine]
tokenize :: TokenizerConfig -> Syntax -> Text -> Either String [SourceLine]
tokenize config :: TokenizerConfig
config syntax :: Syntax
syntax inp :: Text
inp =
Either String ContextStack
eitherStack Either String ContextStack
-> (ContextStack -> Either String [SourceLine])
-> Either String [SourceLine]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(!ContextStack
stack) ->
case TokenizerM [SourceLine]
-> TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String [SourceLine])
forall a.
TokenizerM a
-> TokenizerConfig
-> TokenizerState
-> (TokenizerState, Result String a)
runTokenizerM TokenizerM [SourceLine]
action
TokenizerConfig
config{ syntaxMap :: SyntaxMap
syntaxMap = (Syntax -> Syntax) -> SyntaxMap -> SyntaxMap
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (SyntaxMap -> Syntax -> Syntax
resolveKeywords (TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config))
(TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config) }
(ContextStack -> TokenizerState
startingState ContextStack
stack) of
(_, Success ls :: [SourceLine]
ls) -> [SourceLine] -> Either String [SourceLine]
forall a b. b -> Either a b
Right [SourceLine]
ls
(_, Error e :: String
e) -> String -> Either String [SourceLine]
forall a b. a -> Either a b
Left String
e
(_, Failure) -> String -> Either String [SourceLine]
forall a b. a -> Either a b
Left "Could not tokenize code"
where
action :: TokenizerM [SourceLine]
action = ((ByteString, Int) -> TokenizerM SourceLine)
-> [(ByteString, Int)] -> TokenizerM [SourceLine]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (ByteString, Int) -> TokenizerM SourceLine
tokenizeLine ([ByteString] -> [Int] -> [(ByteString, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (ByteString -> [ByteString]
BS.lines (Text -> ByteString
encodeUtf8 Text
inp)) [1..])
eitherStack :: Either String ContextStack
eitherStack = case Text -> Syntax -> Maybe Context
lookupContext (Syntax -> Text
sStartingContext Syntax
syntax)
(SyntaxMap -> Syntax -> Syntax
resolveKeywords (TokenizerConfig -> SyntaxMap
syntaxMap TokenizerConfig
config) Syntax
syntax) of
Just c :: Context
c -> ContextStack -> Either String ContextStack
forall a b. b -> Either a b
Right (ContextStack -> Either String ContextStack)
-> ContextStack -> Either String ContextStack
forall a b. (a -> b) -> a -> b
$ NonEmpty Context -> ContextStack
ContextStack (Context
c Context -> [Context] -> NonEmpty Context
forall a. a -> [a] -> NonEmpty a
:| [])
Nothing -> String -> Either String ContextStack
forall a b. a -> Either a b
Left "No starting context specified"
startingState :: ContextStack -> TokenizerState
startingState stack :: ContextStack
stack =
$WTokenizerState :: ByteString
-> Bool
-> Char
-> ContextStack
-> IntMap ByteString
-> Int
-> Bool
-> Maybe Int
-> Map RE Regex
-> TokenizerState
TokenizerState{ input :: ByteString
input = ByteString
BS.empty
, endline :: Bool
endline = Text -> Bool
Text.null Text
inp
, prevChar :: Char
prevChar = '\n'
, contextStack :: ContextStack
contextStack = ContextStack
stack
, captures :: IntMap ByteString
captures = IntMap ByteString
forall a. Monoid a => a
mempty
, column :: Int
column = 0
, lineContinuation :: Bool
lineContinuation = Bool
False
, firstNonspaceColumn :: Maybe Int
firstNonspaceColumn = Maybe Int
forall a. Maybe a
Nothing
, compiledRegexes :: Map RE Regex
compiledRegexes = Map RE Regex
forall k a. Map k a
Map.empty
}
info :: String -> TokenizerM ()
info :: String -> TokenizerM ()
info s :: String
s = do
Bool
tr <- (TokenizerConfig -> Bool) -> TokenizerM Bool
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> Bool
traceOutput
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tr (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ String -> TokenizerM () -> TokenizerM ()
forall a. String -> a -> a
trace String
s (() -> TokenizerM ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
infoContextStack :: TokenizerM ()
infoContextStack :: TokenizerM ()
infoContextStack = do
Bool
tr <- (TokenizerConfig -> Bool) -> TokenizerM Bool
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> Bool
traceOutput
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
tr (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ do
ContextStack stack :: NonEmpty Context
stack <- (TokenizerState -> ContextStack) -> TokenizerM ContextStack
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "CONTEXT STACK " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Text] -> String
forall a. Show a => a -> String
show ((Context -> Text) -> [Context] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Context -> Text
cName ([Context] -> [Text]) -> [Context] -> [Text]
forall a b. (a -> b) -> a -> b
$ NonEmpty Context -> [Context]
forall a. NonEmpty a -> [a]
toList NonEmpty Context
stack)
popContextStack :: TokenizerM ()
popContextStack :: TokenizerM ()
popContextStack = do
ContextStack cs :: NonEmpty Context
cs <- (TokenizerState -> ContextStack) -> TokenizerM ContextStack
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
case NonEmpty Context
cs of
(_ :| []) -> String -> TokenizerM ()
info "WARNING: Tried to pop only element on context stack!"
(_ :| (x :: Context
x:xs :: [Context]
xs)) -> do
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\st :: TokenizerState
st -> TokenizerState
st{ contextStack :: ContextStack
contextStack = NonEmpty Context -> ContextStack
ContextStack (Context
x Context -> [Context] -> NonEmpty Context
forall a. a -> [a] -> NonEmpty a
:| [Context]
xs) })
TokenizerM ()
infoContextStack
pushContextStack :: Context -> TokenizerM ()
pushContextStack :: Context -> TokenizerM ()
pushContextStack cont :: Context
cont = do
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\st :: TokenizerState
st -> TokenizerState
st{ contextStack :: ContextStack
contextStack =
NonEmpty Context -> ContextStack
ContextStack
((Context
cont Context -> NonEmpty Context -> NonEmpty Context
forall a. a -> NonEmpty a -> NonEmpty a
<|) (NonEmpty Context -> NonEmpty Context)
-> (ContextStack -> NonEmpty Context)
-> ContextStack
-> NonEmpty Context
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ContextStack -> NonEmpty Context
unContextStack (ContextStack -> NonEmpty Context)
-> ContextStack -> NonEmpty Context
forall a b. (a -> b) -> a -> b
$ TokenizerState -> ContextStack
contextStack TokenizerState
st) } )
TokenizerM ()
infoContextStack
currentContext :: TokenizerM Context
currentContext :: TokenizerM Context
currentContext = do
ContextStack (c :: Context
c :| _) <- (TokenizerState -> ContextStack) -> TokenizerM ContextStack
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ContextStack
contextStack
Context -> TokenizerM Context
forall (m :: * -> *) a. Monad m => a -> m a
return Context
c
doContextSwitch :: ContextSwitch -> TokenizerM ()
doContextSwitch :: ContextSwitch -> TokenizerM ()
doContextSwitch Pop = TokenizerM ()
popContextStack
doContextSwitch (Push (!Text
syn,!Text
c)) = do
SyntaxMap
syntaxes <- (TokenizerConfig -> SyntaxMap) -> TokenizerM SyntaxMap
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> SyntaxMap
syntaxMap
case Text -> SyntaxMap -> Maybe Syntax
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
syn SyntaxMap
syntaxes Maybe Syntax -> (Syntax -> Maybe Context) -> Maybe Context
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Syntax -> Maybe Context
lookupContext Text
c of
Just !Context
con -> Context -> TokenizerM ()
pushContextStack Context
con
Nothing -> String -> TokenizerM ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "Unknown syntax or context: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (Text, Text) -> String
forall a. Show a => a -> String
show (Text
syn, Text
c)
doContextSwitches :: [ContextSwitch] -> TokenizerM ()
doContextSwitches :: [ContextSwitch] -> TokenizerM ()
doContextSwitches = (ContextSwitch -> TokenizerM ())
-> [ContextSwitch] -> TokenizerM ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ContextSwitch -> TokenizerM ()
doContextSwitch
lookupContext :: Text -> Syntax -> Maybe Context
lookupContext :: Text -> Syntax -> Maybe Context
lookupContext name :: Text
name syntax :: Syntax
syntax | Text -> Bool
Text.null Text
name =
if Text -> Bool
Text.null (Syntax -> Text
sStartingContext Syntax
syntax)
then Maybe Context
forall a. Maybe a
Nothing
else Text -> Syntax -> Maybe Context
lookupContext (Syntax -> Text
sStartingContext Syntax
syntax) Syntax
syntax
lookupContext name :: Text
name syntax :: Syntax
syntax = Text -> Map Text Context -> Maybe Context
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
name (Map Text Context -> Maybe Context)
-> Map Text Context -> Maybe Context
forall a b. (a -> b) -> a -> b
$ Syntax -> Map Text Context
sContexts Syntax
syntax
tokenizeLine :: (ByteString, Int) -> TokenizerM [Token]
tokenizeLine :: (ByteString, Int) -> TokenizerM SourceLine
tokenizeLine (!ByteString
ln, !Int
linenum) = do
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
ln, endline :: Bool
endline = ByteString -> Bool
BS.null ByteString
ln, prevChar :: Char
prevChar = '\n' }
Context
cur <- TokenizerM Context
currentContext
Bool
lineCont <- (TokenizerState -> Bool) -> TokenizerM Bool
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
lineContinuation
if Bool
lineCont
then (TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ lineContinuation :: Bool
lineContinuation = Bool
False }
else do
let !mbFirstNonspace :: Maybe Int
mbFirstNonspace = (Char -> Bool) -> ByteString -> Maybe Int
BS.findIndex (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) (ByteString -> Maybe Int) -> ByteString -> Maybe Int
forall a b. (a -> b) -> a -> b
$! ByteString
ln
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ column :: Int
column = 0
, firstNonspaceColumn :: Maybe Int
firstNonspaceColumn = Maybe Int
mbFirstNonspace }
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineBeginContext Context
cur)
if ByteString -> Bool
BS.null ByteString
ln
then [ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineEmptyContext Context
cur)
else [ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineBeginContext Context
cur)
SourceLine
ts <- SourceLine -> SourceLine
normalizeHighlighting (SourceLine -> SourceLine)
-> ([Maybe Token] -> SourceLine) -> [Maybe Token] -> SourceLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe Token] -> SourceLine
forall a. [Maybe a] -> [a]
catMaybes ([Maybe Token] -> SourceLine)
-> TokenizerM [Maybe Token] -> TokenizerM SourceLine
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerM (Maybe Token) -> TokenizerM [Maybe Token]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many TokenizerM (Maybe Token)
getToken
Bool
eol <- (TokenizerState -> Bool) -> TokenizerM Bool
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline
if Bool
eol
then do
TokenizerM Context
currentContext TokenizerM Context -> (Context -> TokenizerM ()) -> TokenizerM ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Context -> TokenizerM ()
checkLineEnd
SourceLine -> TokenizerM SourceLine
forall (m :: * -> *) a. Monad m => a -> m a
return SourceLine
ts
else do
Int
col <- (TokenizerState -> Int) -> TokenizerM Int
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column
String -> TokenizerM SourceLine
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM SourceLine)
-> String -> TokenizerM SourceLine
forall a b. (a -> b) -> a -> b
$ "Could not match anything at line " String -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> String
forall a. Show a => a -> String
show Int
linenum String -> ShowS
forall a. [a] -> [a] -> [a]
++ " column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
col
getToken :: TokenizerM (Maybe Token)
getToken :: TokenizerM (Maybe Token)
getToken = do
ByteString
inp <- (TokenizerState -> ByteString) -> TokenizerM ByteString
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
(TokenizerState -> Bool) -> TokenizerM Bool
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline TokenizerM Bool -> (Bool -> TokenizerM ()) -> TokenizerM ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> (Bool -> Bool) -> Bool -> TokenizerM ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Bool
not
!Context
context <- TokenizerM Context
currentContext
[TokenizerM (Maybe Token)] -> TokenizerM (Maybe Token)
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ((Rule -> TokenizerM (Maybe Token))
-> [Rule] -> [TokenizerM (Maybe Token)]
forall a b. (a -> b) -> [a] -> [b]
map (\r :: Rule
r -> Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule Rule
r ByteString
inp) (Context -> [Rule]
cRules Context
context)) TokenizerM (Maybe Token)
-> TokenizerM (Maybe Token) -> TokenizerM (Maybe Token)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
case Context -> [ContextSwitch]
cFallthroughContext Context
context of
[] | Context -> Bool
cFallthrough Context
context -> Maybe Token
forall a. Maybe a
Nothing Maybe Token -> TokenizerM () -> TokenizerM (Maybe Token)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ [ContextSwitch] -> TokenizerM ()
doContextSwitches [ContextSwitch
Pop]
| Bool
otherwise -> do
Text
t <- TokenizerM Text
normalChunk
let mbtok :: Maybe Token
mbtok = Token -> Maybe Token
forall a. a -> Maybe a
Just (Context -> TokenType
cAttribute Context
context, Text
t)
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "FALLTHROUGH " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Maybe Token -> String
forall a. Show a => a -> String
show Maybe Token
mbtok
Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
mbtok
cs :: [ContextSwitch]
cs -> Maybe Token
forall a. Maybe a
Nothing Maybe Token -> TokenizerM () -> TokenizerM (Maybe Token)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ [ContextSwitch] -> TokenizerM ()
doContextSwitches [ContextSwitch]
cs
takeChars :: Int -> TokenizerM Text
takeChars :: Int -> TokenizerM Text
takeChars 0 = TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
takeChars numchars :: Int
numchars = do
ByteString
inp <- (TokenizerState -> ByteString) -> TokenizerM ByteString
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
let (bs :: ByteString
bs,rest :: ByteString
rest) = Int -> ByteString -> (ByteString, ByteString)
UTF8.splitAt Int
numchars ByteString
inp
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
bs)
!Text
t <- ByteString -> TokenizerM Text
decodeBS ByteString
bs
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
rest,
endline :: Bool
endline = ByteString -> Bool
BS.null ByteString
rest,
prevChar :: Char
prevChar = Text -> Char
Text.last Text
t,
column :: Int
column = TokenizerState -> Int
column TokenizerState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
numchars }
Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
tryRule :: Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule :: Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule _ "" = TokenizerM (Maybe Token)
forall (m :: * -> *) a. MonadPlus m => m a
mzero
tryRule rule :: Rule
rule inp :: ByteString
inp = do
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "Trying rule " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Rule -> String
forall a. Show a => a -> String
show Rule
rule
case Rule -> Maybe Int
rColumn Rule
rule of
Nothing -> () -> TokenizerM ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just n :: Int
n -> (TokenizerState -> Int) -> TokenizerM Int
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column TokenizerM Int -> (Int -> TokenizerM ()) -> TokenizerM ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> (Int -> Bool) -> Int -> TokenizerM ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n)
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Rule -> Bool
rFirstNonspace Rule
rule) (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ do
!Maybe Int
firstNonspace <- (TokenizerState -> Maybe Int) -> TokenizerM (Maybe Int)
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Maybe Int
firstNonspaceColumn
!Int
col <- (TokenizerState -> Int) -> TokenizerM Int
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Int
column
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Maybe Int
firstNonspace Maybe Int -> Maybe Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Maybe Int
forall a. a -> Maybe a
Just Int
col)
Maybe TokenizerState
oldstate <- if Rule -> Bool
rLookahead Rule
rule
then TokenizerState -> Maybe TokenizerState
forall a. a -> Maybe a
Just (TokenizerState -> Maybe TokenizerState)
-> TokenizerM TokenizerState -> TokenizerM (Maybe TokenizerState)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerM TokenizerState
forall s (m :: * -> *). MonadState s m => m s
get
else Maybe TokenizerState -> TokenizerM (Maybe TokenizerState)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe TokenizerState
forall a. Maybe a
Nothing
let attr :: TokenType
attr = Rule -> TokenType
rAttribute Rule
rule
Maybe Token
mbtok <- case Rule -> Matcher
rMatcher Rule
rule of
DetectChar c :: Char
c -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Bool -> Char -> ByteString -> TokenizerM Text
detectChar (Rule -> Bool
rDynamic Rule
rule) Char
c ByteString
inp
Detect2Chars c :: Char
c d :: Char
d -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$
Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars (Rule -> Bool
rDynamic Rule
rule) Char
c Char
d ByteString
inp
AnyChar cs :: Set Char
cs -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Set Char -> ByteString -> TokenizerM Text
anyChar Set Char
cs ByteString
inp
RangeDetect c :: Char
c d :: Char
d -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Char -> Char -> ByteString -> TokenizerM Text
rangeDetect Char
c Char
d ByteString
inp
RegExpr re :: RE
re -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Bool -> RE -> ByteString -> TokenizerM Text
regExpr (Rule -> Bool
rDynamic Rule
rule) RE
re ByteString
inp
Int -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseInt ByteString
inp
HlCOct -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseOct ByteString
inp
HlCHex -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseHex ByteString
inp
HlCStringChar -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseCStringChar ByteString
inp
HlCChar -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseCChar ByteString
inp
Float -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
parseFloat ByteString
inp
Keyword _kwattr :: KeywordAttr
_kwattr (Left listname :: Text
listname) ->
String -> TokenizerM (Maybe Token)
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM (Maybe Token))
-> String -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ "Keyword with unresolved list " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
forall a. Show a => a -> String
show Text
listname
Keyword kwattr :: KeywordAttr
kwattr (Right kws :: WordSet Text
kws) ->
TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword KeywordAttr
kwattr WordSet Text
kws ByteString
inp
StringDetect s :: Text
s -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$
Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect (Rule -> Bool
rDynamic Rule
rule) (Rule -> Bool
rCaseSensitive Rule
rule)
Text
s ByteString
inp
WordDetect s :: Text
s -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$
Bool -> Text -> ByteString -> TokenizerM Text
wordDetect (Rule -> Bool
rCaseSensitive Rule
rule) Text
s ByteString
inp
LineContinue -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
lineContinue ByteString
inp
DetectSpaces -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
detectSpaces ByteString
inp
DetectIdentifier -> TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr TokenType
attr (TokenizerM Text -> TokenizerM (Maybe Token))
-> TokenizerM Text -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM Text
detectIdentifier ByteString
inp
IncludeRules cname :: (Text, Text)
cname -> Maybe TokenType
-> (Text, Text) -> ByteString -> TokenizerM (Maybe Token)
includeRules
(if Rule -> Bool
rIncludeAttribute Rule
rule then TokenType -> Maybe TokenType
forall a. a -> Maybe a
Just TokenType
attr else Maybe TokenType
forall a. Maybe a
Nothing)
(Text, Text)
cname ByteString
inp
Maybe Token
mbchildren <- do
ByteString
inp' <- (TokenizerState -> ByteString) -> TokenizerM ByteString
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
[TokenizerM (Maybe Token)] -> TokenizerM (Maybe Token)
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ((Rule -> TokenizerM (Maybe Token))
-> [Rule] -> [TokenizerM (Maybe Token)]
forall a b. (a -> b) -> [a] -> [b]
map (\r :: Rule
r -> Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule Rule
r ByteString
inp') (Rule -> [Rule]
rChildren Rule
rule)) TokenizerM (Maybe Token)
-> TokenizerM (Maybe Token) -> TokenizerM (Maybe Token)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
forall a. Maybe a
Nothing
Maybe Token
mbtok' <- case Maybe Token
mbtok of
Nothing -> Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
forall a. Maybe a
Nothing
Just (tt :: TokenType
tt, s :: Text
s)
| Rule -> Bool
rLookahead Rule
rule -> do
(oldinput :: ByteString
oldinput, oldendline :: Bool
oldendline, oldprevChar :: Char
oldprevChar, oldColumn :: Int
oldColumn) <-
case Maybe TokenizerState
oldstate of
Nothing -> String -> TokenizerM (ByteString, Bool, Char, Int)
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
"oldstate not saved with lookahead rule"
Just st :: TokenizerState
st -> (ByteString, Bool, Char, Int)
-> TokenizerM (ByteString, Bool, Char, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return
(TokenizerState -> ByteString
input TokenizerState
st, TokenizerState -> Bool
endline TokenizerState
st,
TokenizerState -> Char
prevChar TokenizerState
st, TokenizerState -> Int
column TokenizerState
st)
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ input :: ByteString
input = ByteString
oldinput
, endline :: Bool
endline = Bool
oldendline
, prevChar :: Char
prevChar = Char
oldprevChar
, column :: Int
column = Int
oldColumn }
Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
forall a. Maybe a
Nothing
| Bool
otherwise -> do
case Maybe Token
mbchildren of
Nothing -> Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Token -> TokenizerM (Maybe Token))
-> Maybe Token -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Maybe Token
forall a. a -> Maybe a
Just (TokenType
tt, Text
s)
Just (_, cresult :: Text
cresult) -> Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Token -> TokenizerM (Maybe Token))
-> Maybe Token -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Maybe Token
forall a. a -> Maybe a
Just (TokenType
tt, Text
s Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
cresult)
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=' ') (Matcher -> String
forall a. Show a => a -> String
show (Rule -> Matcher
rMatcher Rule
rule)) String -> ShowS
forall a. [a] -> [a] -> [a]
++ " MATCHED " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Maybe Token -> String
forall a. Show a => a -> String
show Maybe Token
mbtok'
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Rule -> [ContextSwitch]
rContextSwitch Rule
rule)
Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
mbtok'
withAttr :: TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr :: TokenType -> TokenizerM Text -> TokenizerM (Maybe Token)
withAttr tt :: TokenType
tt p :: TokenizerM Text
p = do
Text
res <- TokenizerM Text
p
if Text -> Bool
Text.null Text
res
then Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Token
forall a. Maybe a
Nothing
else Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Token -> TokenizerM (Maybe Token))
-> Maybe Token -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Maybe Token
forall a. a -> Maybe a
Just (TokenType
tt, Text
res)
wordDetect :: Bool -> Text -> ByteString -> TokenizerM Text
wordDetect :: Bool -> Text -> ByteString -> TokenizerM Text
wordDetect caseSensitive :: Bool
caseSensitive s :: Text
s inp :: ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
Text
t <- ByteString -> TokenizerM Text
decodeBS (ByteString -> TokenizerM Text) -> ByteString -> TokenizerM Text
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
UTF8.take (Text -> Int
Text.length Text
s) ByteString
inp
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ if Bool
caseSensitive
then Text
s Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
t
else Text -> CI Text
forall s. FoldCase s => s -> CI s
mk Text
s CI Text -> CI Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> CI Text
forall s. FoldCase s => s -> CI s
mk Text
t
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Text -> Bool
Text.null Text
t)
let c :: Char
c = Text -> Char
Text.last Text
t
let rest :: ByteString
rest = Int -> ByteString -> ByteString
UTF8.drop (Text -> Int
Text.length Text
s) ByteString
inp
let d :: Char
d = case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
rest of
Nothing -> '\n'
Just (x :: Char
x,_) -> Char
x
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary Char
c Char
d
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
t)
stringDetect :: Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect :: Bool -> Bool -> Text -> ByteString -> TokenizerM Text
stringDetect dynamic :: Bool
dynamic caseSensitive :: Bool
caseSensitive s :: Text
s inp :: ByteString
inp = do
Text
s' <- if Bool
dynamic
then do
Text
dynStr <- Text -> TokenizerM Text
subDynamicText Text
s
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "Dynamic string: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show Text
dynStr
Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
dynStr
else Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
s
Text
t <- ByteString -> TokenizerM Text
decodeBS (ByteString -> TokenizerM Text) -> ByteString -> TokenizerM Text
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
UTF8.take (Text -> Int
Text.length Text
s') ByteString
inp
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ if Bool
caseSensitive
then Text
s' Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
t
else Text -> CI Text
forall s. FoldCase s => s -> CI s
mk Text
s' CI Text -> CI Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> CI Text
forall s. FoldCase s => s -> CI s
mk Text
t
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
s')
subDynamicText :: Text -> TokenizerM Text
subDynamicText :: Text -> TokenizerM Text
subDynamicText t :: Text
t = do
let substitute :: Text -> TokenizerM Text
substitute x :: Text
x = case Text -> Maybe (Char, Text)
Text.uncons Text
x of
Just (c :: Char
c, rest :: Text
rest) | Char -> Bool
isDigit Char
c -> let capNum :: Int
capNum = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord '0'
in (Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
rest) (Text -> Text) -> TokenizerM Text -> TokenizerM Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> TokenizerM Text
getCapture Int
capNum
_ -> Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> TokenizerM Text) -> Text -> TokenizerM Text
forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
Text.cons '%' Text
x
case (Char -> Bool) -> Text -> [Text]
Text.split (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '%') Text
t of
[] -> Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
Text.empty
x :: Text
x:rest :: [Text]
rest -> (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) (Text -> Text) -> ([Text] -> Text) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
Text.concat ([Text] -> Text) -> TokenizerM [Text] -> TokenizerM Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> TokenizerM Text) -> [Text] -> TokenizerM [Text]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Text -> TokenizerM Text
substitute [Text]
rest
normalChunk :: TokenizerM Text
normalChunk :: TokenizerM Text
normalChunk = do
ByteString
inp <- (TokenizerState -> ByteString) -> TokenizerM ByteString
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> ByteString
input
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Nothing -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Just (c :: Char
c, _)
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ' ' ->
let bs :: ByteString
bs = (Char -> Bool) -> ByteString -> ByteString
BS.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==' ') ByteString
inp
in Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
bs)
| Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
c ->
let (bs :: ByteString
bs, _) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.span Char -> Bool
isAlphaNum ByteString
inp
in Int -> TokenizerM Text
takeChars (ByteString -> Int
UTF8.length ByteString
bs)
| Bool
otherwise -> Int -> TokenizerM Text
takeChars 1
includeRules :: Maybe TokenType -> ContextName -> ByteString
-> TokenizerM (Maybe Token)
includeRules :: Maybe TokenType
-> (Text, Text) -> ByteString -> TokenizerM (Maybe Token)
includeRules mbattr :: Maybe TokenType
mbattr (syn :: Text
syn, con :: Text
con) inp :: ByteString
inp = do
SyntaxMap
syntaxes <- (TokenizerConfig -> SyntaxMap) -> TokenizerM SyntaxMap
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks TokenizerConfig -> SyntaxMap
syntaxMap
case Text -> SyntaxMap -> Maybe Syntax
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
syn SyntaxMap
syntaxes Maybe Syntax -> (Syntax -> Maybe Context) -> Maybe Context
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Syntax -> Maybe Context
lookupContext Text
con of
Nothing -> do
Context
cur <- TokenizerM Context
currentContext
String -> TokenizerM (Maybe Token)
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM (Maybe Token))
-> String -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ "IncludeRules in " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
Text.unpack (Context -> Text
cSyntax Context
cur) String -> ShowS
forall a. [a] -> [a] -> [a]
++
" requires undefined context " String -> ShowS
forall a. [a] -> [a] -> [a]
++
Text -> String
Text.unpack Text
con String -> ShowS
forall a. [a] -> [a] -> [a]
++ "##" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
Text.unpack Text
syn
Just c :: Context
c -> do
Maybe Token
mbtok <- [TokenizerM (Maybe Token)] -> TokenizerM (Maybe Token)
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ((Rule -> TokenizerM (Maybe Token))
-> [Rule] -> [TokenizerM (Maybe Token)]
forall a b. (a -> b) -> [a] -> [b]
map (\r :: Rule
r -> Rule -> ByteString -> TokenizerM (Maybe Token)
tryRule Rule
r ByteString
inp) (Context -> [Rule]
cRules Context
c))
Maybe Token -> TokenizerM (Maybe Token)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Token -> TokenizerM (Maybe Token))
-> Maybe Token -> TokenizerM (Maybe Token)
forall a b. (a -> b) -> a -> b
$ case (Maybe Token
mbtok, Maybe TokenType
mbattr) of
(Just (NormalTok, xs :: Text
xs), Just attr :: TokenType
attr) -> Token -> Maybe Token
forall a. a -> Maybe a
Just (TokenType
attr, Text
xs)
_ -> Maybe Token
mbtok
checkLineEnd :: Context -> TokenizerM ()
checkLineEnd :: Context -> TokenizerM ()
checkLineEnd c :: Context
c = do
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([ContextSwitch] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Context -> [ContextSwitch]
cLineEndContext Context
c)) (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ do
Bool
eol <- (TokenizerState -> Bool) -> TokenizerM Bool
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
endline
String -> TokenizerM ()
info (String -> TokenizerM ()) -> String -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ "checkLineEnd for " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show (Context -> Text
cName Context
c) String -> ShowS
forall a. [a] -> [a] -> [a]
++ " eol = " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> String
forall a. Show a => a -> String
show Bool
eol String -> ShowS
forall a. [a] -> [a] -> [a]
++ " cLineEndContext = " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [ContextSwitch] -> String
forall a. Show a => a -> String
show (Context -> [ContextSwitch]
cLineEndContext Context
c)
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
eol (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ do
Bool
lineCont' <- (TokenizerState -> Bool) -> TokenizerM Bool
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Bool
lineContinuation
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
lineCont' (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ do
[ContextSwitch] -> TokenizerM ()
doContextSwitches (Context -> [ContextSwitch]
cLineEndContext Context
c)
Context
c' <- TokenizerM Context
currentContext
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Context
c Context -> Context -> Bool
forall a. Eq a => a -> a -> Bool
== Context
c') (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Context -> TokenizerM ()
checkLineEnd Context
c'
detectChar :: Bool -> Char -> ByteString -> TokenizerM Text
detectChar :: Bool -> Char -> ByteString -> TokenizerM Text
detectChar dynamic :: Bool
dynamic c :: Char
c inp :: ByteString
inp = do
Char
c' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9'
then Char -> TokenizerM Char
getDynamicChar Char
c
else Char -> TokenizerM Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (x :: Char
x,_) | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c' -> Int -> TokenizerM Text
takeChars 1
_ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
getDynamicChar :: Char -> TokenizerM Char
getDynamicChar :: Char -> TokenizerM Char
getDynamicChar c :: Char
c = do
let capNum :: Int
capNum = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord '0'
Text
res <- Int -> TokenizerM Text
getCapture Int
capNum
case Text -> Maybe (Char, Text)
Text.uncons Text
res of
Nothing -> TokenizerM Char
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Just (d :: Char
d,_) -> Char -> TokenizerM Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
d
detect2Chars :: Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars :: Bool -> Char -> Char -> ByteString -> TokenizerM Text
detect2Chars dynamic :: Bool
dynamic c :: Char
c d :: Char
d inp :: ByteString
inp = do
Char
c' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9'
then Char -> TokenizerM Char
getDynamicChar Char
c
else Char -> TokenizerM Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
Char
d' <- if Bool
dynamic Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9'
then Char -> TokenizerM Char
getDynamicChar Char
d
else Char -> TokenizerM Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
d
if (Text -> ByteString
encodeUtf8 (String -> Text
Text.pack [Char
c',Char
d'])) ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
inp
then Int -> TokenizerM Text
takeChars 2
else TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
rangeDetect :: Char -> Char -> ByteString -> TokenizerM Text
rangeDetect :: Char -> Char -> ByteString -> TokenizerM Text
rangeDetect c :: Char
c d :: Char
d inp :: ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (x :: Char
x, rest :: ByteString
rest)
| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c -> case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
d) ByteString
rest of
(in_t :: ByteString
in_t, out_t :: ByteString
out_t)
| ByteString -> Bool
BS.null ByteString
out_t -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
| Bool
otherwise -> do
Text
t <- ByteString -> TokenizerM Text
decodeBS ByteString
in_t
Int -> TokenizerM Text
takeChars (Text -> Int
Text.length Text
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2)
_ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
detectSpaces :: ByteString -> TokenizerM Text
detectSpaces :: ByteString -> TokenizerM Text
detectSpaces inp :: ByteString
inp = do
case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.span (\c :: Char
c -> Char -> Bool
isSpace Char
c) ByteString
inp of
(t :: ByteString
t, _)
| ByteString -> Bool
BS.null ByteString
t -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
| Bool
otherwise -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
t)
detectIdentifier :: ByteString -> TokenizerM Text
detectIdentifier :: ByteString -> TokenizerM Text
detectIdentifier inp :: ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
inp of
Just (c :: Char
c, t :: ByteString
t) | (Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLetter Char
c) Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_' ->
Int -> TokenizerM Text
takeChars (Int -> TokenizerM Text) -> Int -> TokenizerM Text
forall a b. (a -> b) -> a -> b
$ 1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> (Int -> Int) -> Maybe Int -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> Int
BS.length ByteString
t) Int -> Int
forall a. a -> a
id
((Char -> Bool) -> ByteString -> Maybe Int
BS.findIndex (\d :: Char
d -> Bool -> Bool
not (Char -> Bool
isAscii Char
d) Bool -> Bool -> Bool
||
Bool -> Bool
not (Char -> Bool
isAlphaNum Char
d Bool -> Bool -> Bool
|| Char
d Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_')) ByteString
t)
_ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
lineContinue :: ByteString -> TokenizerM Text
lineContinue :: ByteString -> TokenizerM Text
lineContinue inp :: ByteString
inp = do
if ByteString
inp ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== "\\"
then do
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ lineContinuation :: Bool
lineContinuation = Bool
True }
Int -> TokenizerM Text
takeChars 1
else TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
anyChar :: Set.Set Char -> ByteString -> TokenizerM Text
anyChar :: Set Char -> ByteString -> TokenizerM Text
anyChar cs :: Set Char
cs inp :: ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Just (x :: Char
x, _) | Char
x Char -> Set Char -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Char
cs -> Int -> TokenizerM Text
takeChars 1
_ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
regExpr :: Bool -> RE -> ByteString -> TokenizerM Text
regExpr :: Bool -> RE -> ByteString -> TokenizerM Text
regExpr dynamic :: Bool
dynamic re :: RE
re inp :: ByteString
inp = do
let reStr :: ByteString
reStr = RE -> ByteString
reString RE
re
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int -> ByteString -> ByteString
BS.take 2 ByteString
reStr ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== "\\b") (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ ByteString -> TokenizerM ()
wordBoundary ByteString
inp
Map RE Regex
compiledREs <- (TokenizerState -> Map RE Regex) -> TokenizerM (Map RE Regex)
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Map RE Regex
compiledRegexes
Regex
regex <- case RE -> Map RE Regex -> Maybe Regex
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup RE
re Map RE Regex
compiledREs of
Nothing -> do
Regex
cre <- case Bool -> ByteString -> Either String Regex
compileRegex (RE -> Bool
reCaseSensitive RE
re) ByteString
reStr of
Right r :: Regex
r -> Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return Regex
r
Left e :: String
e -> String -> TokenizerM Regex
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM Regex) -> String -> TokenizerM Regex
forall a b. (a -> b) -> a -> b
$
"Error compiling regex " String -> ShowS
forall a. [a] -> [a] -> [a]
++
ByteString -> String
UTF8.toString ByteString
reStr String -> ShowS
forall a. [a] -> [a] -> [a]
++ ": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ compiledRegexes :: Map RE Regex
compiledRegexes =
RE -> Regex -> Map RE Regex -> Map RE Regex
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert RE
re Regex
cre (TokenizerState -> Map RE Regex
compiledRegexes TokenizerState
st) }
Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return Regex
cre
Just cre :: Regex
cre -> Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return Regex
cre
Regex
regex' <- if Bool
dynamic
then Regex -> TokenizerM Regex
subDynamic Regex
regex
else Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return Regex
regex
case Regex -> ByteString -> Maybe (ByteString, IntMap (Int, Int))
matchRegex Regex
regex' ByteString
inp of
Just (matchedBytes :: ByteString
matchedBytes, capts :: IntMap (Int, Int)
capts) -> do
Bool -> TokenizerM () -> TokenizerM ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (IntMap (Int, Int) -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null IntMap (Int, Int)
capts) (TokenizerM () -> TokenizerM ()) -> TokenizerM () -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$
(TokenizerState -> TokenizerState) -> TokenizerM ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((TokenizerState -> TokenizerState) -> TokenizerM ())
-> (TokenizerState -> TokenizerState) -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ \st :: TokenizerState
st -> TokenizerState
st{ captures :: IntMap ByteString
captures =
((Int, Int) -> ByteString)
-> IntMap (Int, Int) -> IntMap ByteString
forall a b. (a -> b) -> IntMap a -> IntMap b
IntMap.map (ByteString -> (Int, Int) -> ByteString
toSlice ByteString
inp) IntMap (Int, Int)
capts }
Int -> TokenizerM Text
takeChars (ByteString -> Int
UTF8.length ByteString
matchedBytes)
_ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
toSlice :: ByteString -> (Int, Int) -> ByteString
toSlice :: ByteString -> (Int, Int) -> ByteString
toSlice bs :: ByteString
bs (off :: Int
off, len :: Int
len) = Int -> ByteString -> ByteString
BS.take Int
len (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop Int
off ByteString
bs
wordBoundary :: ByteString -> TokenizerM ()
wordBoundary :: ByteString -> TokenizerM ()
wordBoundary inp :: ByteString
inp = do
case ByteString -> Maybe (Char, ByteString)
UTF8.uncons ByteString
inp of
Nothing -> () -> TokenizerM ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just (d :: Char
d, _) -> do
Char
c <- (TokenizerState -> Char) -> TokenizerM Char
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Char
prevChar
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary Char
c Char
d
isWordBoundary :: Char -> Char -> Bool
isWordBoundary :: Char -> Char -> Bool
isWordBoundary c :: Char
c d :: Char
d = Char -> Bool
isWordChar Char
c Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= Char -> Bool
isWordChar Char
d
decodeBS :: ByteString -> TokenizerM Text
decodeBS :: ByteString -> TokenizerM Text
decodeBS bs :: ByteString
bs = case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs of
Left _ -> String -> TokenizerM Text
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError ("ByteString " String -> ShowS
forall a. [a] -> [a] -> [a]
++
ByteString -> String
forall a. Show a => a -> String
show ByteString
bs String -> ShowS
forall a. [a] -> [a] -> [a]
++ "is not UTF8")
Right t :: Text
t -> Text -> TokenizerM Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
subDynamic :: Regex -> TokenizerM Regex
subDynamic :: Regex -> TokenizerM Regex
subDynamic (MatchDynamic capNum :: Int
capNum) = do
Text
replacement <- Int -> TokenizerM Text
getCapture Int
capNum
Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (Regex -> TokenizerM Regex) -> Regex -> TokenizerM Regex
forall a b. (a -> b) -> a -> b
$ [Regex] -> Regex
forall a. Monoid a => [a] -> a
mconcat ([Regex] -> Regex) -> [Regex] -> Regex
forall a b. (a -> b) -> a -> b
$ (Char -> Regex) -> String -> [Regex]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Bool) -> Regex
MatchChar ((Char -> Bool) -> Regex)
-> (Char -> Char -> Bool) -> Char -> Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
(==)) (String -> [Regex]) -> String -> [Regex]
forall a b. (a -> b) -> a -> b
$ Text -> String
Text.unpack Text
replacement
subDynamic (MatchAlt r1 :: Regex
r1 r2 :: Regex
r2) =
Regex -> Regex -> Regex
MatchAlt (Regex -> Regex -> Regex)
-> TokenizerM Regex -> TokenizerM (Regex -> Regex)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r1 TokenizerM (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Regex -> TokenizerM Regex
subDynamic Regex
r2
subDynamic (MatchConcat r1 :: Regex
r1 r2 :: Regex
r2) =
Regex -> Regex -> Regex
MatchConcat (Regex -> Regex -> Regex)
-> TokenizerM Regex -> TokenizerM (Regex -> Regex)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r1 TokenizerM (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Regex -> TokenizerM Regex
subDynamic Regex
r2
subDynamic (MatchSome r :: Regex
r) =
Regex -> Regex
MatchSome (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (MatchCapture i :: Int
i r :: Regex
r) =
Int -> Regex -> Regex
MatchCapture Int
i (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (AssertPositive dir :: Direction
dir r :: Regex
r) =
Direction -> Regex -> Regex
AssertPositive Direction
dir (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic (AssertNegative dir :: Direction
dir r :: Regex
r) =
Direction -> Regex -> Regex
AssertNegative Direction
dir (Regex -> Regex) -> TokenizerM Regex -> TokenizerM Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> TokenizerM Regex
subDynamic Regex
r
subDynamic x :: Regex
x = Regex -> TokenizerM Regex
forall (m :: * -> *) a. Monad m => a -> m a
return Regex
x
getCapture :: Int -> TokenizerM Text
getCapture :: Int -> TokenizerM Text
getCapture capnum :: Int
capnum = do
IntMap ByteString
capts <- (TokenizerState -> IntMap ByteString)
-> TokenizerM (IntMap ByteString)
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> IntMap ByteString
captures
case Int -> IntMap ByteString -> Maybe ByteString
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
capnum IntMap ByteString
capts of
Nothing -> String -> TokenizerM Text
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> TokenizerM Text) -> String -> TokenizerM Text
forall a b. (a -> b) -> a -> b
$ "Capture " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
capnum String -> ShowS
forall a. Semigroup a => a -> a -> a
<> " not defined!"
Just x :: ByteString
x -> ByteString -> TokenizerM Text
decodeBS ByteString
x
keyword :: KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword :: KeywordAttr -> WordSet Text -> ByteString -> TokenizerM Text
keyword kwattr :: KeywordAttr
kwattr kws :: WordSet Text
kws inp :: ByteString
inp = do
Char
prev <- (TokenizerState -> Char) -> TokenizerM Char
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets TokenizerState -> Char
prevChar
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Char
prev Char -> Set Char -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` (KeywordAttr -> Set Char
keywordDelims KeywordAttr
kwattr)
let (w :: ByteString
w,_) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
UTF8.break (Char -> Set Char -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` (KeywordAttr -> Set Char
keywordDelims KeywordAttr
kwattr)) ByteString
inp
Bool -> TokenizerM ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TokenizerM ()) -> Bool -> TokenizerM ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
w)
Text
w' <- ByteString -> TokenizerM Text
decodeBS ByteString
w
let numchars :: Int
numchars = Text -> Int
Text.length Text
w'
if Text
w' Text -> WordSet Text -> Bool
forall a. (FoldCase a, Ord a) => a -> WordSet a -> Bool
`inWordSet` WordSet Text
kws
then Int -> TokenizerM Text
takeChars Int
numchars
else TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
normalizeHighlighting :: [Token] -> [Token]
normalizeHighlighting :: SourceLine -> SourceLine
normalizeHighlighting [] = []
normalizeHighlighting ((!TokenType
t,!Text
x):xs :: SourceLine
xs)
| Text -> Bool
Text.null Text
x = SourceLine -> SourceLine
normalizeHighlighting SourceLine
xs
| Bool
otherwise =
(TokenType
t, Text
matchedText) Token -> SourceLine -> SourceLine
forall a. a -> [a] -> [a]
: SourceLine -> SourceLine
normalizeHighlighting SourceLine
rest
where (matches :: SourceLine
matches, rest :: SourceLine
rest) = (Token -> Bool) -> SourceLine -> (SourceLine, SourceLine)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (\(z :: TokenType
z,_) -> TokenType
z TokenType -> TokenType -> Bool
forall a. Eq a => a -> a -> Bool
== TokenType
t) SourceLine
xs
!matchedText :: Text
matchedText = [Text] -> Text
Text.concat (Text
x Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: (Token -> Text) -> SourceLine -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Token -> Text
forall a b. (a, b) -> b
snd SourceLine
matches)
parseCStringChar :: ByteString -> TokenizerM Text
parseCStringChar :: ByteString -> TokenizerM Text
parseCStringChar inp :: ByteString
inp = do
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pCStringChar) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pCStringChar :: A.Parser ()
pCStringChar :: Parser ()
pCStringChar = do
Char
_ <- Char -> Parser Char
A.char '\\'
Char
next <- Parser Char
A.anyChar
case Char
next of
c :: Char
c | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'x' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'X' -> () () -> Parser ByteString ByteString -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 (String -> Char -> Bool
A.inClass "0-9a-fA-F")
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '0' -> () () -> Parser ByteString ByteString -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile (String -> Char -> Bool
A.inClass "0-7")
| String -> Char -> Bool
A.inClass "abefnrtv\"'?\\" Char
c -> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
| Bool
otherwise -> Parser ()
forall (m :: * -> *) a. MonadPlus m => m a
mzero
parseCChar :: ByteString -> TokenizerM Text
parseCChar :: ByteString -> TokenizerM Text
parseCChar inp :: ByteString
inp = do
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pCChar) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pCChar :: A.Parser ()
pCChar :: Parser ()
pCChar = do
() () -> Parser Char -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char '\''
Parser ()
pCStringChar Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () () -> Parser Char -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\'' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\\')
() () -> Parser Char -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char '\''
parseInt :: ByteString -> TokenizerM Text
parseInt :: ByteString -> TokenizerM Text
parseInt inp :: ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match (Parser ()
pHex Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
pOct Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
pDec)) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pDec :: A.Parser ()
pDec :: Parser ()
pDec = do
Parser ()
mbMinus
ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 (String -> Char -> Bool
A.inClass "0-9")
Parser ()
guardWordBoundary
parseOct :: ByteString -> TokenizerM Text
parseOct :: ByteString -> TokenizerM Text
parseOct inp :: ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pHex) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pOct :: A.Parser ()
pOct :: Parser ()
pOct = do
Parser ()
mbMinus
Char
_ <- Char -> Parser Char
A.char '0'
Char
_ <- (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass "Oo")
ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 (String -> Char -> Bool
A.inClass "0-7")
Parser ()
guardWordBoundary
parseHex :: ByteString -> TokenizerM Text
parseHex :: ByteString -> TokenizerM Text
parseHex inp :: ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pHex) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
pHex :: A.Parser ()
pHex :: Parser ()
pHex = do
Parser ()
mbMinus
Char
_ <- Char -> Parser Char
A.char '0'
Char
_ <- (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass "Xx")
ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 (String -> Char -> Bool
A.inClass "0-9a-fA-F")
Parser ()
guardWordBoundary
guardWordBoundary :: A.Parser ()
guardWordBoundary :: Parser ()
guardWordBoundary = do
Maybe Char
mbw <- Parser (Maybe Char)
A.peekChar
case Maybe Char
mbw of
Just c :: Char
c -> Bool -> Parser ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Parser ()) -> Bool -> Parser ()
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isWordBoundary '0' Char
c
Nothing -> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
mbMinus :: A.Parser ()
mbMinus :: Parser ()
mbMinus = (() () -> Parser Char -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
A.char '-') Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
mbPlusMinus :: A.Parser ()
mbPlusMinus :: Parser ()
mbPlusMinus = () () -> Parser Char -> Parser ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass "+-") Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
parseFloat :: ByteString -> TokenizerM Text
parseFloat :: ByteString -> TokenizerM Text
parseFloat inp :: ByteString
inp = do
ByteString -> TokenizerM ()
wordBoundary ByteString
inp
case Parser (ByteString, ())
-> ByteString -> Either String (ByteString, ())
forall a. Parser a -> ByteString -> Either String a
A.parseOnly (Parser () -> Parser (ByteString, ())
forall a. Parser a -> Parser (ByteString, a)
A.match Parser ()
pFloat) ByteString
inp of
Left _ -> TokenizerM Text
forall (m :: * -> *) a. MonadPlus m => m a
mzero
Right (r :: ByteString
r,_) -> Int -> TokenizerM Text
takeChars (ByteString -> Int
BS.length ByteString
r)
where pFloat :: A.Parser ()
pFloat :: Parser ()
pFloat = do
let digits :: Parser ByteString ByteString
digits = (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 (String -> Char -> Bool
A.inClass "0-9")
Parser ()
mbPlusMinus
Bool
before <- Bool -> Parser ByteString Bool -> Parser ByteString Bool
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False (Parser ByteString Bool -> Parser ByteString Bool)
-> Parser ByteString Bool -> Parser ByteString Bool
forall a b. (a -> b) -> a -> b
$ Bool
True Bool -> Parser ByteString ByteString -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString
digits
Bool
dot <- Bool -> Parser ByteString Bool -> Parser ByteString Bool
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False (Parser ByteString Bool -> Parser ByteString Bool)
-> Parser ByteString Bool -> Parser ByteString Bool
forall a b. (a -> b) -> a -> b
$ Bool
True Bool -> Parser Char -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass ".")
Bool
after <- Bool -> Parser ByteString Bool -> Parser ByteString Bool
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False (Parser ByteString Bool -> Parser ByteString Bool)
-> Parser ByteString Bool -> Parser ByteString Bool
forall a b. (a -> b) -> a -> b
$ Bool
True Bool -> Parser ByteString ByteString -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString
digits
Bool
e <- Bool -> Parser ByteString Bool -> Parser ByteString Bool
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
A.option Bool
False (Parser ByteString Bool -> Parser ByteString Bool)
-> Parser ByteString Bool -> Parser ByteString Bool
forall a b. (a -> b) -> a -> b
$ Bool
True Bool -> Parser ByteString ByteString -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ((Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass "Ee") Parser Char -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
Parser ()
mbPlusMinus Parser ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ByteString
digits)
Maybe Char
mbnext <- Parser (Maybe Char)
A.peekChar
case Maybe Char
mbnext of
Nothing -> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just c :: Char
c -> Bool -> Parser ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
A.inClass "." Char
c)
Bool -> Parser ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Parser ()) -> Bool -> Parser ()
forall a b. (a -> b) -> a -> b
$ (Bool
before Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
dot Bool -> Bool -> Bool
&& Bool
e)
Bool -> Bool -> Bool
|| (Bool
before Bool -> Bool -> Bool
&& Bool
dot Bool -> Bool -> Bool
&& (Bool
after Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
e))
Bool -> Bool -> Bool
|| (Bool -> Bool
not Bool
before Bool -> Bool -> Bool
&& Bool
dot Bool -> Bool -> Bool
&& Bool
after)