zksecurity
diff --git a/‎CLAUDE.md‎
Lines changed: 16 additions & 6 deletions b/‎CLAUDE.md‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎Lython/Lexer.lean‎
Lines changed: 17 additions & 0 deletions b/‎Lython/Lexer.lean‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Lython/Lexer/Char.lean‎
Lines changed: 29 additions & 0 deletions b/‎Lython/Lexer/Char.lean‎
Lines changed: 29 additions & 0 deletions
@@ -13,15 +13,25 @@ Python 3.12 interpreter in Lean4, targeting the leanSpec Ethereum consensus spec
 ```
 Lython.lean          — umbrella import for the library
 Lython/
-  Lexer.lean         — tokenizer
-  Parser.lean        — PEG parser
-  AST.lean           — Python AST node types
-  Interpreter.lean   — tree-walking interpreter
-  Runtime.lean       — runtime support (types, exceptions, stdlib)
+  Lexer.lean         — tokenizer entry point (imports sub-modules, exposes tokenize)
+  Lexer/
+    Types.lean       — SourcePos, SourceSpan, TokenKind, Token, LexError
+    Keywords.lean    — Python 3.12 keyword enum + lookup
+    Operators.lean   — Operator and Delimiter enums
+    State.lean       — LexerState, LexerM monad, char peek/advance helpers
+    Char.lean        — Character classification (isIdentStart, isHexDigit, etc.)
+    Number.lean      — Integer/float/imaginary literal lexing
+    StringLit.lean   — String/bytes/raw/f-string literal lexing
+    Indent.lean      — INDENT/DEDENT generation from leading whitespace
+    Core.lean        — Main tokenization loop, operator dispatch
+  Parser.lean        — PEG parser (stub)
+  AST.lean           — Python AST node types (stub)
+  Interpreter.lean   — tree-walking interpreter (stub)
+  Runtime.lean       — runtime support (stub)
 Main.lean            — CLI entry point
 LythonTest.lean      — test driver root
 LythonTest/
-  Basic.lean         — smoke tests
+  Basic.lean         — lexer tests (keywords, operators, numbers, strings, indent)
 ```
 
 ## Code Style
 
@@ -1,4 +1,21 @@
+import Lython.Lexer.Types
+import Lython.Lexer.Keywords
+import Lython.Lexer.Operators
+import Lython.Lexer.State
+import Lython.Lexer.Char
+import Lython.Lexer.Indent
+import Lython.Lexer.Number
+import Lython.Lexer.StringLit
+import Lython.Lexer.Core
+
 set_option autoImplicit false
 
 namespace Lython.Lexer
+
+/-- Tokenize a Python source string into an array of tokens. -/
+def tokenize (source : String) : Except LexError (Array Token) :=
+  match lexAll.run (LexerState.initial source) with
+  | .ok (tokens, _) => .ok tokens
+  | .error err => .error err
+
 end Lython.Lexer
@@ -0,0 +1,29 @@
+set_option autoImplicit false
+
+namespace Lython.Lexer
+
+/-- Is the character a valid Python identifier start? (ASCII subset) -/
+def isIdentStart (c : Char) : Bool :=
+  c.isAlpha || c == '_'
+
+/-- Is the character a valid Python identifier continuation? (ASCII subset) -/
+def isIdentCont (c : Char) : Bool :=
+  c.isAlphanum || c == '_'
+
+/-- Is the character an octal digit? -/
+def isOctDigit (c : Char) : Bool :=
+  c.val ≥ 0x30 && c.val ≤ 0x37
+
+/-- Is the character a binary digit? -/
+def isBinDigit (c : Char) : Bool :=
+  c == '0' || c == '1'
+
+/-- Is the character a hex digit? -/
+def isHexDigit (c : Char) : Bool :=
+  c.isDigit || (c.val ≥ 0x61 && c.val ≤ 0x66) || (c.val ≥ 0x41 && c.val ≤ 0x46)
+
+/-- Is the character horizontal whitespace (not newline)? -/
+def isHorizWhitespace (c : Char) : Bool :=
+  c == ' ' || c == '\t' || c == '\x0C'
+
+end Lython.Lexer