mirror of
https://github.com/Drezil/dear-imgui.hs.git
synced 2024-11-22 16:57:00 +00:00
198 lines
6.1 KiB
Haskell
198 lines
6.1 KiB
Haskell
|
{-# LANGUAGE DerivingStrategies #-}
|
||
|
{-# LANGUAGE FlexibleContexts #-}
|
||
|
{-# LANGUAGE FlexibleInstances #-}
|
||
|
{-# LANGUAGE LambdaCase #-}
|
||
|
{-# LANGUAGE OverloadedStrings #-}
|
||
|
{-# LANGUAGE TypeApplications #-}
|
||
|
{-# LANGUAGE ViewPatterns #-}
|
||
|
|
||
|
module DearImGui.Generator.Tokeniser where
|
||
|
|
||
|
-- base
|
||
|
import Control.Arrow
|
||
|
( first, second )
|
||
|
import Control.Applicative
|
||
|
( (<|>), some )
|
||
|
import Data.Char
|
||
|
( isAlpha, isAlphaNum, isDigit, isPunctuation, isSpace, isSymbol, toLower )
|
||
|
import Data.Function
|
||
|
( (&) )
|
||
|
import Data.Functor
|
||
|
( ($>) )
|
||
|
import Data.Monoid
|
||
|
( Sum(..) )
|
||
|
|
||
|
-- megaparsec
|
||
|
import Text.Megaparsec
|
||
|
( MonadParsec, VisualStream(..)
|
||
|
, chunk, parseMaybe, satisfy, try
|
||
|
)
|
||
|
import Text.Megaparsec.Char.Lexer
|
||
|
( hexadecimal, scientific )
|
||
|
|
||
|
-- parser-combinators
|
||
|
import Control.Monad.Combinators
|
||
|
( optional )
|
||
|
|
||
|
-- scientific
|
||
|
import Data.Scientific
|
||
|
( Scientific )
|
||
|
|
||
|
-- text
|
||
|
import Data.Text
|
||
|
( Text )
|
||
|
import qualified Data.Text as Text
|
||
|
( break, breakOn, cons, drop, dropWhile
|
||
|
, head, last, length
|
||
|
, pack, snoc, span, strip, tail, take
|
||
|
, uncons, unpack
|
||
|
)
|
||
|
|
||
|
-- unordered-containers
|
||
|
import Data.HashSet
|
||
|
( HashSet )
|
||
|
import qualified Data.HashSet as HashSet
|
||
|
( fromList, member )
|
||
|
|
||
|
--------------------------------------------------------------------------------
|
||
|
|
||
|
data TokeniserError
|
||
|
= Couldn'tParseNumber { problem :: !Text }
|
||
|
| UnhandledCase { unhandled :: !( Char, Text ) }
|
||
|
deriving stock ( Eq, Ord, Show )
|
||
|
|
||
|
data Tok
|
||
|
= Keyword !Text
|
||
|
| ReservedSymbol !Char
|
||
|
| Symbolic !Text
|
||
|
| Identifier !Text
|
||
|
| Comment !Text
|
||
|
| Char !Char
|
||
|
| String !Text
|
||
|
| Number !Scientific !Text
|
||
|
| BeginCPP !Text
|
||
|
| EndCPPLine
|
||
|
deriving stock ( Show, Eq, Ord )
|
||
|
|
||
|
showToken :: Tok -> String
|
||
|
showToken = \case
|
||
|
Keyword t -> Text.unpack t
|
||
|
ReservedSymbol c -> [c]
|
||
|
Symbolic t -> Text.unpack t
|
||
|
Identifier t -> Text.unpack t
|
||
|
Comment t -> Text.unpack t
|
||
|
Char c -> [c]
|
||
|
String t -> Text.unpack t
|
||
|
Number s t -> show s <> Text.unpack t
|
||
|
BeginCPP t -> "#" <> Text.unpack t
|
||
|
EndCPPLine -> "EndCppLine"
|
||
|
|
||
|
tokenLength :: Tok -> Int
|
||
|
tokenLength = \case
|
||
|
Keyword t -> Text.length t
|
||
|
ReservedSymbol _ -> 1
|
||
|
Symbolic t -> Text.length t
|
||
|
Identifier t -> Text.length t
|
||
|
Comment t -> Text.length t
|
||
|
Char _ -> 1
|
||
|
String t -> Text.length t
|
||
|
Number s t -> length ( show s ) + Text.length t
|
||
|
BeginCPP t -> 1 + Text.length t
|
||
|
EndCPPLine -> length ( "EndCPPLine" :: String )
|
||
|
|
||
|
instance VisualStream [Tok] where
|
||
|
showTokens _ = foldMap showToken
|
||
|
tokensLength _ = getSum . foldMap ( Sum . tokenLength )
|
||
|
|
||
|
keywords :: HashSet Text
|
||
|
keywords = HashSet.fromList
|
||
|
[ "auto", "break", "case", "char", "const", "continue", "default", "do", "double"
|
||
|
, "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", "long"
|
||
|
, "register", "restrict", "return", "short", "signed", "sizeof", "static", "struct"
|
||
|
, "switch", "typedef", "union", "unsigned", "void", "volatile", "while"
|
||
|
]
|
||
|
|
||
|
reservedSymbols :: HashSet Char
|
||
|
reservedSymbols = HashSet.fromList [ '(', ')', '{', '}', ',', ';', '=', '#' ]
|
||
|
|
||
|
tokenise :: Text -> Either TokeniserError [ Tok ]
|
||
|
tokenise ( Text.uncons -> Just ( c, cs ) )
|
||
|
| isSpace c
|
||
|
= tokenise ( Text.dropWhile isSpace cs )
|
||
|
| isAlpha c || c == '_'
|
||
|
, let
|
||
|
this, rest :: Text
|
||
|
( this, rest ) = first ( c `Text.cons` ) $ Text.span ( \ x -> isAlphaNum x || x == '_' ) cs
|
||
|
= if this `HashSet.member` keywords
|
||
|
then ( Keyword this : ) <$> tokenise rest
|
||
|
else ( Identifier this : ) <$> tokenise rest
|
||
|
| isDigit c
|
||
|
, let
|
||
|
this, rest :: Text
|
||
|
( this, rest ) = continuePastExponent $ first ( c `Text.cons` ) $ Text.span ( \ x -> isAlphaNum x || x == '.' ) cs
|
||
|
= case parseMaybe @() parseNumber this of
|
||
|
Just numTok -> ( numTok : ) <$> tokenise rest
|
||
|
Nothing -> Left ( Couldn'tParseNumber { problem = this } )
|
||
|
| c == '\''
|
||
|
, Just ( '\'', rest ) <- Text.uncons ( Text.drop 1 cs )
|
||
|
= ( Char ( Text.head cs ) : ) <$> tokenise rest
|
||
|
| c == '\"'
|
||
|
, let
|
||
|
this, rest :: Text
|
||
|
( this, rest ) = second Text.tail $ Text.break ( == '"') cs
|
||
|
= ( String this : ) <$> tokenise rest
|
||
|
| c == '#'
|
||
|
, let
|
||
|
directive, line, rest :: Text
|
||
|
( directive, ( line, rest ) )
|
||
|
= cs
|
||
|
& Text.break ( isSpace )
|
||
|
& second ( Text.break ( `elem` [ '\n', '\r' ] ) )
|
||
|
= do
|
||
|
lineTokens <- tokenise line
|
||
|
restTokens <- tokenise rest
|
||
|
pure ( ( BeginCPP directive : lineTokens ) <> ( EndCPPLine : restTokens ) )
|
||
|
| c `HashSet.member` reservedSymbols
|
||
|
= ( ReservedSymbol c : ) <$> tokenise cs
|
||
|
| c == '/'
|
||
|
= case Text.take 1 cs of
|
||
|
"/" ->
|
||
|
let
|
||
|
comm, rest :: Text
|
||
|
( comm, rest ) = first Text.strip $ Text.break ( `elem` [ '\n', '\r' ] ) ( Text.drop 1 cs )
|
||
|
in ( Comment comm : ) <$> tokenise rest
|
||
|
"*" ->
|
||
|
let
|
||
|
comm, rest :: Text
|
||
|
( comm, rest ) = Text.breakOn "*/" ( Text.drop 1 cs )
|
||
|
in ( Comment comm : ) <$> tokenise rest
|
||
|
_ ->
|
||
|
let
|
||
|
this, rest :: Text
|
||
|
( this, rest ) = first ( c `Text.cons` ) $ Text.span ( \ x -> x /= '_' && ( isSymbol x || isPunctuation x ) ) cs
|
||
|
in ( Symbolic this : ) <$> tokenise rest
|
||
|
| isSymbol c || isPunctuation c
|
||
|
, let
|
||
|
this, rest :: Text
|
||
|
( this, rest ) = first ( c `Text.cons` ) $ Text.span ( \ x -> x /= '_' && ( isSymbol x || isPunctuation x ) ) cs
|
||
|
= ( Symbolic this : ) <$> tokenise rest
|
||
|
| otherwise
|
||
|
= Left $ UnhandledCase { unhandled = ( c, cs ) }
|
||
|
tokenise _ = Right []
|
||
|
|
||
|
continuePastExponent :: ( Text, Text ) -> ( Text, Text )
|
||
|
continuePastExponent ( this, rest )
|
||
|
| toLower ( Text.last this ) `elem` [ 'e', 'p' ]
|
||
|
, Just ( r, rs ) <- Text.uncons rest
|
||
|
, r `elem` [ '+', '-' ]
|
||
|
, ( this', rest' ) <- Text.span isAlphaNum rs
|
||
|
= ( this `Text.snoc` r <> this', rest' )
|
||
|
| otherwise
|
||
|
= ( this, rest )
|
||
|
|
||
|
parseNumber :: MonadParsec e Text m => m Tok
|
||
|
parseNumber = try ( chunk "0.f" $> Number 0 "f" ) <|> do
|
||
|
value <- try ( chunk "0x" *> hexadecimal ) <|> scientific
|
||
|
mbSuffix <- fmap ( maybe "" Text.pack ) . optional . some $ satisfy ( \ s -> toLower s `elem` ( "uflz" :: String ) )
|
||
|
pure ( Number value mbSuffix )
|