Skip to content

Commit

Permalink
Update to return full source ranges, 0-based line and column numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
benl23x5 committed Jan 2, 2019
1 parent 85684b7 commit d44ff6a
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 137 deletions.
23 changes: 12 additions & 11 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

Inchworm is a simple parser combinator framework specialized to
lexical analysis.
Tokens can be specified via simple fold functions,
and we include baked in source location handling.
Tokens are specified via simple fold functions, and we include
baked in source location handling.

If you want to parse expressions instead of performing lexical
analysis then try the `parsec` or `attoparsec` packages, which
Expand Down Expand Up @@ -32,18 +32,18 @@ error.

```
import Text.Lexer.Inchworm.Char
import qualified Data.Char as Char
import qualified Data.Char as Char
-- | A source token.
data Token
= KBra | KKet | KVar String | KCon String | KInt Integer
deriving Show
-- | A thing with attached location information.
data Located a
= Located FilePath Location a
= Located FilePath (Range Location) a
deriving Show
-- | Scanner for a lispy language.
scanner :: FilePath
-> Scanner IO Location [Char] (Located Token)
Expand All @@ -58,11 +58,12 @@ scanner fileName
, fmap (stamp KCon)
$ munchWord (\ix c -> if ix == 0 then Char.isUpper c
else Char.isAlpha c)
]
]
where -- Stamp a token with source location information.
stamp k (l, t)
= Located fileName l (k t)
stamp k (range, t)
= Located fileName range (k t)
main :: IO ()
main
= do let fileName = "Source.lispy"
let source = "(some (Lispy like) 26 Program 93 (for you))"
Expand Down
13 changes: 13 additions & 0 deletions Text/Lexer/Inchworm.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
-- are in the "Text.Lexer.Inchworm.Char" module.
--
-- No dependencies other than the Haskell 'base' library.
--
-- __ Release Notes __
--
-- @
-- For 1.1.1.1:
-- * Matching combinators now produce the first and final locations
-- that matched.
-- * Line and column offsets are now 0-based instead of 1-based,
-- for easier inteface with client editors that expect this (eg VSCode).
-- Thanks to Amos Robinson:
-- * Haskell string parser now correctly handles strings gaps and the
-- string escape character \\&
-- @
--
-- __ Minimal example __
--
Expand Down
72 changes: 37 additions & 35 deletions Text/Lexer/Inchworm/Char.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Text.Lexer.Inchworm.Char
, scanStringIO

-- * Locations
, Location (..)
, Range (..), Location (..)
, bumpLocationWithChar

-- * Scanners
Expand All @@ -33,7 +33,7 @@ scanStringIO

scanStringIO str scanner
= scanListIO
(Location 1 1)
(Location 0 0)
bumpLocationWithChar
str scanner

Expand All @@ -45,35 +45,35 @@ scanStringIO str scanner
bumpLocationWithChar :: Char -> Location -> Location
bumpLocationWithChar c (Location line col)
= case c of
'\n' -> Location (line + 1) 1
'\n' -> Location (line + 1) 0
_ -> Location line (col + 1)


-- Integers -------------------------------------------------------------------
-- | Scan a decimal integer, with optional @-@ and @+@ sign specifiers.
scanInteger
:: Monad m
=> Scanner m loc [Char] (loc, Integer)
=> Scanner m loc [Char] (Range loc, Integer)

scanInteger
= munchPred Nothing matchInt acceptInt
where
matchInt 0 !c
= c == '-' || c == '+' || Char.isDigit c

matchInt _ !c = Char.isDigit c
matchInt _ !c = Char.isDigit c

acceptInt ('+' : cs)
| null cs = Nothing
| null cs = Nothing

acceptInt ('-' : cs)
| null cs = Nothing
| null cs = Nothing

acceptInt cs = Just $ read cs
acceptInt cs = Just $ read cs

{-# SPECIALIZE INLINE
scanInteger
:: Scanner IO Location [Char] (Location, Integer)
:: Scanner IO Location [Char] (Range Location, Integer)
#-}

-- Strings --------------------------------------------------------------------
Expand All @@ -83,7 +83,7 @@ scanInteger
--
scanHaskellString
:: Monad m
=> Scanner m loc [Char] (loc, String)
=> Scanner m loc [Char] (Range loc, String)

scanHaskellString
= munchFold Nothing matchC (False, False) acceptC
Expand Down Expand Up @@ -112,7 +112,7 @@ scanHaskellString

{-# SPECIALIZE INLINE
scanHaskellString
:: Scanner IO Location [Char] (Location, String)
:: Scanner IO Location [Char] (Range Location, String)
#-}


Expand All @@ -123,7 +123,7 @@ scanHaskellString
--
scanHaskellChar
:: Monad m
=> Scanner m loc [Char] (loc, Char)
=> Scanner m loc [Char] (Range loc, Char)

scanHaskellChar
= munchFold Nothing matchC (False, False) acceptC
Expand All @@ -141,23 +141,24 @@ scanHaskellChar

acceptC ('\'' : cs)
= case readChar cs of
-- Character literals do not support gaps or escape terminators
-- Character literals do not support gaps or
-- escape terminators
Just (Just c, "\'") -> Just c
_ -> Nothing

acceptC _ = Nothing

{-# SPECIALIZE INLINE
scanHaskellChar
:: Scanner IO Location [Char] (Location, Char)
:: Scanner IO Location [Char] (Range Location, Char)
#-}


-- Comments -------------------------------------------------------------------
-- | Scan a Haskell block comment.
scanHaskellCommentBlock
:: Monad m
=> Scanner m loc [Char] (loc, String)
=> Scanner m loc [Char] (Range loc, String)

scanHaskellCommentBlock
= munchFold Nothing matchC (' ', True) acceptC
Expand All @@ -177,14 +178,14 @@ scanHaskellCommentBlock

{-# SPECIALIZE INLINE
scanHaskellCommentBlock
:: Scanner IO Location [Char] (Location, String)
:: Scanner IO Location [Char] (Range Location, String)
#-}


-- | Scan a Haskell line comment.
scanHaskellCommentLine
:: Monad m
=> Scanner m loc [Char] (loc, String)
=> Scanner m loc [Char] (Range loc, String)

scanHaskellCommentLine
= munchPred Nothing matchC acceptC
Expand All @@ -201,7 +202,7 @@ scanHaskellCommentLine

{-# SPECIALIZE INLINE
scanHaskellCommentLine
:: Scanner IO Location [Char] (Location, String)
:: Scanner IO Location [Char] (Range Location, String)
#-}


Expand All @@ -223,12 +224,13 @@ decodeString ss0
Just (Nothing, cs') -> go acc cs'
Nothing -> go (c : acc) cs

-- | Result of reading a character: either a real char, or an empty string that is a
-- successful read, but contains no characters.
-- These empty strings are sometimes required to remove ambiguity: for example,
-- '\SO' and '\SOH' are both valid escapes.
-- To distinguish between the strings ['\SO', 'H'] and ['\SOH'], it is necessary
-- to explicitly terminate the escape for the former: '\SO\&H' means ['\SO', 'H'].
-- | Result of reading a character: either a real char, or an empty string
-- that is a successful read, but contains no characters.
-- These empty strings are sometimes required to remove ambiguity:
-- for example,'\SO' and '\SOH' are both valid escapes.
-- To distinguish between the strings ['\SO', 'H'] and ['\SOH'],
-- it is necessary to explicitly terminate the escape for the former:
-- '\SO\&H' means ['\SO', 'H'].
type CharGap = Maybe Char

-- | Read a character literal, handling escape codes.
Expand All @@ -246,13 +248,13 @@ readChar ('\\' : 'o' : cs)

-- Control characters defined by carret characters, like \^G
readChar ('\\' : '^' : c : rest)
| c >= 'A' && c <= 'Z' = Just (Just $ Char.chr (Char.ord c - 1), rest)
| c == '@' = Just (Just $ Char.chr 0, rest)
| c == '[' = Just (Just $ Char.chr 27, rest)
| c == '\\' = Just (Just $ Char.chr 28, rest)
| c == ']' = Just (Just $ Char.chr 29, rest)
| c == '^' = Just (Just $ Char.chr 30, rest)
| c == '_' = Just (Just $ Char.chr 31, rest)
| c >= 'A' && c <= 'Z' = Just (Just $ Char.chr (Char.ord c - 1), rest)
| c == '@' = Just (Just $ Char.chr 0, rest)
| c == '[' = Just (Just $ Char.chr 27, rest)
| c == '\\' = Just (Just $ Char.chr 28, rest)
| c == ']' = Just (Just $ Char.chr 29, rest)
| c == '^' = Just (Just $ Char.chr 30, rest)
| c == '_' = Just (Just $ Char.chr 31, rest)

-- Control characters defined by decimal escape codes.
readChar ('\\' : cs)
Expand All @@ -276,16 +278,16 @@ readChar ('\\' : cs)
= let go [] = Nothing
go ((str, c) : moar)
= case List.stripPrefix str cs of
Nothing -> go moar
Just rest -> Just (Just c, rest)
Nothing -> go moar
Just rest -> Just (Just c, rest)

in go escapedChars

-- Just a regular character.
readChar (c : rest) = Just (Just c, rest)
readChar (c : rest) = Just (Just c, rest)

-- Nothing to read.
readChar _ = Nothing
readChar _ = Nothing

escapedChars :: [(String, Char)]
escapedChars
Expand Down
Loading

0 comments on commit d44ff6a

Please sign in to comment.