module UU.Scanner.Scanner where
import Data.Char(isLower, isUpper, isSpace, isAlphaNum, isDigit, chr, ord)
import Data.List(sort)
import Data.Maybe(isJust)
import UU.Util.BinaryTrees(tab2tree,btLocateIn)
import UU.Scanner.Token(Token, EnumValToken(..), valueToken, reserved, errToken)
import UU.Scanner.Position(Pos, initPos, advc, adv)
scanFile :: [String] -> [String] -> String -> String -> FilePath -> IO [Token]
scanFile :: [String] -> [String] -> String -> String -> String -> IO [Token]
scanFile [String]
keywordstxt [String]
keywordsops String
specchars String
opchars String
fn =
do String
txt <- String -> IO String
readFile String
fn
[Token] -> IO [Token]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([String]
-> [String] -> String -> String -> Pos -> String -> [Token]
scan [String]
keywordstxt [String]
keywordsops String
specchars String
opchars (String -> Pos
initPos String
fn) String
txt)
scan :: [String] -> [String] -> String -> String -> Pos -> String -> [Token]
scan :: [String]
-> [String] -> String -> String -> Pos -> String -> [Token]
scan [String]
keywordstxt [String]
keywordsops String
specchars String
opchars Pos
pos String
input
= Pos -> String -> [Token]
doScan Pos
pos String
input
where
locatein :: Ord a => [a] -> a -> Bool
locatein :: forall a. Ord a => [a] -> a -> Bool
locatein [a]
es = Maybe a -> Bool
forall a. Maybe a -> Bool
isJust (Maybe a -> Bool) -> (a -> Maybe a) -> a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> Ordering) -> BinSearchTree a -> a -> Maybe a
forall a b. (a -> b -> Ordering) -> BinSearchTree a -> b -> Maybe a
btLocateIn a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ([a] -> BinSearchTree a
forall av. [av] -> BinSearchTree av
tab2tree ([a] -> [a]
forall a. Ord a => [a] -> [a]
sort [a]
es))
iskw :: String -> Bool
iskw = [String] -> String -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein [String]
keywordstxt
isop :: String -> Bool
isop = [String] -> String -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein [String]
keywordsops
isSymbol :: Char -> Bool
isSymbol = String -> Char -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein String
specchars
isOpsym :: Char -> Bool
isOpsym = String -> Char -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein String
opchars
isIdStart :: Char -> Bool
isIdStart Char
c = Char -> Bool
isLower Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
isIdChar :: Char -> Bool
isIdChar Char
c = Char -> Bool
isAlphaNum Char
c
Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''
Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
scanIdent :: Pos -> String -> (String, Pos, String)
scanIdent Pos
p String
s = let (String
name,String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdChar String
s
in (String
name,Int -> Pos -> Pos
advc (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
name) Pos
p,String
rest)
doScan :: Pos -> String -> [Token]
doScan Pos
p [] = []
doScan Pos
p (Char
c:String
s) | Char -> Bool
isSpace Char
c = let (String
sp,String
next) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSpace String
s
in Pos -> String -> [Token]
doScan ((Pos -> Char -> Pos) -> Pos -> String -> Pos
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Pos -> Char -> Pos
adv Pos
p (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
sp)) String
next
doScan Pos
p (Char
'-':Char
'-':String
s) = Pos -> String -> [Token]
doScan Pos
p ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') String
s)
doScan Pos
p (Char
'{':Char
'-':String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc Int
2 Pos
p) String
s
doScan Pos
p (Char
'"':String
ss)
= let (String
s,Int
swidth,String
rest) = String -> (String, Int, String)
scanString String
ss
in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest Bool -> Bool -> Bool
|| String -> Char
forall a. HasCallStack => [a] -> a
head String
rest Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"'
then String -> Pos -> Token
errToken String
"Unterminated string literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc Int
swidth Pos
p) String
rest
else EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkString String
s Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc (Int
swidthInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) Pos
p) (String -> String
forall a. HasCallStack => [a] -> [a]
tail String
rest)
doScan Pos
p (Char
'\'':String
ss)
= let (Maybe Char
mc,Int
cwidth,String
rest) = String -> (Maybe Char, Int, String)
scanChar String
ss
in case Maybe Char
mc of
Maybe Char
Nothing -> String -> Pos -> Token
errToken String
"Error in character literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc Int
cwidth Pos
p) String
rest
Just Char
c -> if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest Bool -> Bool -> Bool
|| String -> Char
forall a. HasCallStack => [a] -> a
head String
rest Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\''
then String -> Pos -> Token
errToken String
"Unterminated character literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc (Int
cwidthInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Pos
p) String
rest
else EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkChar [Char
c] Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc (Int
cwidthInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) Pos
p) (String -> String
forall a. HasCallStack => [a] -> [a]
tail String
rest)
doScan Pos
p cs :: String
cs@(Char
c:String
s)
| Char -> Bool
isSymbol Char
c = String -> Pos -> Token
reserved [Char
c] Pos
p
Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan(Int -> Pos -> Pos
advc Int
1 Pos
p) String
s
| Char -> Bool
isIdStart Char
c Bool -> Bool -> Bool
|| Char -> Bool
isUpper Char
c
= let (String
name', Pos
p', String
s') = Pos -> String -> (String, Pos, String)
scanIdent (Int -> Pos -> Pos
advc Int
1 Pos
p) String
s
name :: String
name = Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
name'
tok :: Token
tok = if String -> Bool
iskw String
name
then String -> Pos -> Token
reserved String
name Pos
p
else if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
name' Bool -> Bool -> Bool
&& Char -> Bool
isSymbol Char
c
then String -> Pos -> Token
reserved [Char
c] Pos
p
else EnumValToken -> String -> Pos -> Token
valueToken (if Char -> Bool
isIdStart Char
c then EnumValToken
TkVarid else EnumValToken
TkConid) String
name Pos
p
in Token
tok Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan Pos
p' String
s'
| Char -> Bool
isOpsym Char
c = let (String
name, String
s') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isOpsym String
cs
tok :: Token
tok | String -> Bool
isop String
name = String -> Pos -> Token
reserved String
name Pos
p
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
':' = EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkConOp String
name Pos
p
| Bool
otherwise = EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkOp String
name Pos
p
in Token
tok Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan ((Pos -> Char -> Pos) -> Pos -> String -> Pos
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Pos -> Char -> Pos
adv Pos
p String
name) String
s'
| Char -> Bool
isDigit Char
c = let (EnumValToken
tktype,String
number,Int
width,String
s') = String -> (EnumValToken, String, Int, String)
getNumber String
cs
in EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
tktype String
number Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Int -> Pos -> Pos
advc Int
width Pos
p) String
s'
| Bool
otherwise = String -> Pos -> Token
errToken (String
"Unexpected character " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c) Pos
p
Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Pos -> Char -> Pos
adv Pos
p Char
c) String
s
lexNest :: (Pos -> String -> [Token])
-> Pos
-> String
-> [Token]
lexNest :: (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest Pos -> String -> [Token]
cont Pos
pos String
inp = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
cont Pos
pos String
inp
where lexNest' :: (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
c Pos
p (Char
'-':Char
'}':String
s) = Pos -> String -> [Token]
c (Int -> Pos -> Pos
advc Int
2 Pos
p) String
s
lexNest' Pos -> String -> [Token]
c Pos
p (Char
'{':Char
'-':String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' ((Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
c) (Int -> Pos -> Pos
advc Int
2 Pos
p) String
s
lexNest' Pos -> String -> [Token]
c Pos
p (Char
x:String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
c (Pos -> Char -> Pos
adv Pos
p Char
x) String
s
lexNest' Pos -> String -> [Token]
_ Pos
_ [] = [ String -> Pos -> Token
errToken String
"Unterminated nested comment" Pos
pos]
scanString :: String -> (String,Int,String)
scanString :: String -> (String, Int, String)
scanString [] = (String
"",Int
0,[])
scanString (Char
'\\':Char
'&':String
xs) = let (String
str,Int
w,String
r) = String -> (String, Int, String)
scanString String
xs
in (String
str,Int
wInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2,String
r)
scanString (Char
'\'':String
xs) = let (String
str,Int
w,String
r) = String -> (String, Int, String)
scanString String
xs
in (Char
'\''Char -> String -> String
forall a. a -> [a] -> [a]
: String
str,Int
wInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1,String
r)
scanString String
xs = let (Maybe Char
ch,Int
cw,String
cr) = String -> (Maybe Char, Int, String)
getchar String
xs
(String
str,Int
w,String
r) = String -> (String, Int, String)
scanString String
cr
str' :: String
str' = String -> (Char -> String) -> Maybe Char -> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" (Char -> String -> String
forall a. a -> [a] -> [a]
:String
str) Maybe Char
ch
in (String, Int, String)
-> (Char -> (String, Int, String))
-> Maybe Char
-> (String, Int, String)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String
"",Int
0,String
xs) (\Char
c -> (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
str,Int
cwInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
w,String
r)) Maybe Char
ch
scanChar :: [Char] -> (Maybe Char,Int,[Char])
scanChar :: String -> (Maybe Char, Int, String)
scanChar (Char
'"' :String
xs) = (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'"',Int
1,String
xs)
scanChar String
xs = String -> (Maybe Char, Int, String)
getchar String
xs
getchar :: [Char] -> (Maybe Char,Int,[Char])
getchar :: String -> (Maybe Char, Int, String)
getchar [] = (Maybe Char
forall a. Maybe a
Nothing,Int
0,[])
getchar s :: String
s@(Char
'\n':String
_ ) = (Maybe Char
forall a. Maybe a
Nothing,Int
0,String
s )
getchar s :: String
s@(Char
'\t':String
_ ) = (Maybe Char
forall a. Maybe a
Nothing,Int
0,String
s)
getchar s :: String
s@(Char
'\'':String
_ ) = (Maybe Char
forall a. Maybe a
Nothing,Int
0,String
s)
getchar s :: String
s@(Char
'\"' :String
_ ) = (Maybe Char
forall a. Maybe a
Nothing,Int
0,String
s)
getchar (Char
'\\':String
xs) = let (Maybe Char
c,Int
l,String
r) = String -> (Maybe Char, Int, String)
getEscChar String
xs
in (Maybe Char
c,Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1,String
r)
getchar (Char
x:String
xs) = (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x,Int
1,String
xs)
getEscChar :: [Char] -> (Maybe Char,Int,[Char])
getEscChar :: String -> (Maybe Char, Int, String)
getEscChar [] = (Maybe Char
forall a. Maybe a
Nothing,Int
0,[])
getEscChar s :: String
s@(Char
x:String
xs) | Char -> Bool
isDigit Char
x = let (EnumValToken
tp,String
n,Int
len,String
rest) = String -> (EnumValToken, String, Int, String)
getNumber String
s
val :: Int
val = case EnumValToken
tp of
EnumValToken
TkInteger8 -> Int -> String -> Int
readn Int
8 String
n
EnumValToken
TkInteger16 -> Int -> String -> Int
readn Int
16 String
n
EnumValToken
TkInteger10 -> Int -> String -> Int
readn Int
10 String
n
in if Int
val Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
val Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
255
then (Char -> Maybe Char
forall a. a -> Maybe a
Just (Int -> Char
chr Int
val),Int
len, String
rest)
else (Maybe Char
forall a. Maybe a
Nothing,Int
1,String
rest)
| Bool
otherwise = case Char
x Char -> [(Char, Char)] -> Maybe Char
forall a b. Eq a => a -> [(a, b)] -> Maybe b
`lookup` [(Char, Char)]
cntrChars of
Maybe Char
Nothing -> (Maybe Char
forall a. Maybe a
Nothing,Int
0,String
s)
Just Char
c -> (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c,Int
1,String
xs)
where cntrChars :: [(Char, Char)]
cntrChars = [(Char
'a',Char
'\a'),(Char
'b',Char
'\b'),(Char
'f',Char
'\f'),(Char
'n',Char
'\n'),(Char
'r',Char
'\r'),(Char
't',Char
'\t')
,(Char
'v',Char
'\v'),(Char
'\\',Char
'\\'),(Char
'\"',Char
'\"'),(Char
'\'',Char
'\'')]
readn :: Int -> [Char] -> Int
readn :: Int -> String -> Int
readn Int
base String
n = (Int -> Char -> Int) -> Int -> String -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Int
r Char
x -> Char -> Int
value Char
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
base Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
r) Int
0 String
n
getNumber :: [Char] -> (EnumValToken,[Char],Int,[Char])
getNumber :: String -> (EnumValToken, String, Int, String)
getNumber cs :: String
cs@(Char
c:String
s)
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'0' = (EnumValToken, String, Int, String)
num10
| String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
s = (EnumValToken, String, Int, String)
const0
| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'x' Bool -> Bool -> Bool
|| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'X' = (EnumValToken, String, Int, String)
num16
| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'o' Bool -> Bool -> Bool
|| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'O' = (EnumValToken, String, Int, String)
num8
| Bool
otherwise = (EnumValToken, String, Int, String)
num10
where (Char
hs:String
ts) = String
s
const0 :: (EnumValToken, String, Int, String)
const0 = (EnumValToken
TkInteger10, String
"0",Int
1,String
s)
num10 :: (EnumValToken, String, Int, String)
num10 = let (String
n,String
r) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
cs
in (EnumValToken
TkInteger10,String
n,String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
n,String
r)
num16 :: (EnumValToken, String, Int, String)
num16 = (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Int, String)
readNum Char -> Bool
isHexaDigit String
ts EnumValToken
TkInteger16
num8 :: (EnumValToken, String, Int, String)
num8 = (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Int, String)
readNum Char -> Bool
isOctalDigit String
ts EnumValToken
TkInteger8
readNum :: (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Int, String)
readNum Char -> Bool
p String
ts EnumValToken
tk
= let nrs :: (String, String)
nrs@(String
n,String
rs) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
p String
ts
in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
n then (EnumValToken, String, Int, String)
const0
else (EnumValToken
tk , String
n, Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
+String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
n,String
rs)
isHexaDigit :: Char -> Bool
isHexaDigit :: Char -> Bool
isHexaDigit Char
d = Char -> Bool
isDigit Char
d Bool -> Bool -> Bool
|| (Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'F') Bool -> Bool -> Bool
|| (Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'f')
isOctalDigit :: Char -> Bool
isOctalDigit :: Char -> Bool
isOctalDigit Char
d = Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'7'
value :: Char -> Int
value :: Char -> Int
value Char
c | Char -> Bool
isDigit Char
c = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0'
| Char -> Bool
isUpper Char
c = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
10
| Char -> Bool
isLower Char
c = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
10