-- -*- haskell -*-
{
module Data.Aeson.Micro.Scanner (Lexeme(..), scanLexemes) where

import qualified Data.ByteString as B
import Data.Word
}

%encoding "latin1"

-- c.f. RFC 7159

$ws = [\x20\x09\x0a\x0d]

-- unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
$escaped   = [\x00-\x1f\x22\x5c]
$unescaped = [\x00-\xff] # $escaped

-- zero / ( digit1-9 *DIGIT )
@int = "0"|[1-9][0-9]*

-- decimal-point 1*DIGIT
@frac = "."[0-9]+

-- e [ minus / plus ] 1*DIGIT
@exp  = [eE][\-\+]?[0-9]+

-- [ minus ] int [ frac ] [ exp ]
@num = "-"? @int @frac? @exp?

:-

<0> $ws     ;
<0> "{"     { L_ObjStart }
<0> "}"     { L_ObjEnd }
<0> "["     { L_ArrStart }
<0> "]"     { L_ArrEnd }
<0> \"      { L_StrStart }
<0> \:      { L_Colon }
<0> \,      { L_Comma }
<0> "true"  { L_True }
<0> "false" { L_False }
<0> "null"  { L_Null }
<0> @num    { L_Number }

<string> [\x22]                                                          { L_StrEnd }
<string> $unescaped+                                                     { L_StrUnescaped }
<string> \\[\x22\x5c\x2f\x62\x66\x6e\x72\x74]                            { L_StrEscaped }

<string> \\"u"[0-9a-cA-CefEF][0-9a-fA-F]{3}                              { L_StrEscapedHex }
<string> \\"u"[dD][0-7][0-9a-fA-F]{2}                                    { L_StrEscapedHex }
<string> \\"u"[dD][89abAB][0-9a-fA-F]{2} \\"u"[dD][c-fC-F][0-9a-fA-F]{2} { L_StrEscapedHexSurr }

{
data Lexeme
     = L_ArrStart
     | L_ArrEnd
     | L_Colon
     | L_Comma
     | L_False
     | L_Null
     | L_Number
     | L_ObjStart
     | L_ObjEnd
     | L_StrStart
     | L_StrEnd
     | L_StrEscaped
     | L_StrEscapedHex
     | L_StrEscapedHexSurr
     | L_StrUnescaped
     | L_True
     | L_LexError
     deriving (Eq,Ord,Show)

type AlexInput = B.ByteString

alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte = B.uncons

-- alexInputPrevChar :: AlexInput -> Char

-- generated by @alex@
alexScan :: AlexInput -> Int -> AlexReturn Lexeme

scanLexemes :: B.ByteString -> [(Lexeme, B.ByteString)]
scanLexemes = go False
  where
    go inStr bs = case alexScan bs (if inStr then string else 0) of
      AlexEOF             -> []
      AlexError inp'      -> [(L_LexError,inp')]
      AlexSkip  inp' _len -> go inStr inp'
      AlexToken inp' len L_StrUnescaped -- workaround for https://github.com/simonmar/alex/issues/119
        | B.length bs - B.length inp' > len
         -> (L_StrUnescaped,B.take (B.length bs - B.length inp') bs)
            : go inStr inp'
      AlexToken inp' len act
         -> (act,B.take len bs)
            : go (if inStr then act /= L_StrEnd else act == L_StrStart) inp'
}
