Skip to content

Commit 0d862bf

Browse files
pirapiraclaude
andcommitted
Complete Phase 7b: stdlib data handling (struct, io, bisect, base64, json, re)
Add data handling stdlib modules needed for SSZ serialization and fixtures: - struct: pack/unpack/calcsize with format parsing and big/little-endian support - io: BytesIO/StringIO as heap-backed instances with read/write/seek/context manager - bisect: bisect_left/bisect_right/insort with binary search - base64: b64encode/decode, urlsafe variants, b16encode/decode - json: dumps serializer and loads recursive-descent parser with JsonVal AST - re: stub module (not needed for core leanSpec spec) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 029cffb commit 0d862bf

11 files changed

Lines changed: 1245 additions & 1 deletion

File tree

CLAUDE.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ LeanPython/
4848
Stdlib.lean — stdlib umbrella import
4949
Stdlib/
5050
Math.lean — Pure math helpers (ceil, floor, sqrt, log, etc.)
51+
Struct.lean — struct.pack/unpack/calcsize (binary data packing)
52+
IO.lean — io.BytesIO/StringIO (in-memory stream objects)
53+
Bisect.lean — bisect_left/bisect_right/insort (binary search)
54+
Base64.lean — b64encode/decode, b16encode/decode, urlsafe variants
55+
Json.lean — json.dumps (serializer) and json.loads (parser)
5156
Main.lean — CLI entry point (reads .py file, parses, interprets)
5257
LeanPythonTest.lean — test driver root
5358
LeanPythonTest/

LeanPython/Interpreter/Eval.lean

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ open LeanPython.Runtime.Ops
1313
open LeanPython.Runtime.Builtins
1414
open LeanPython.Interpreter
1515
open LeanPython.Parser (parse)
16+
open LeanPython.Stdlib.IO
1617
open LeanPython.Lexer (SourceSpan SourcePos)
1718

1819
-- ============================================================
@@ -724,6 +725,40 @@ partial def callValueDispatch (callee : Value) (args : List Value)
724725
match args with
725726
| [obj] => getAttributeValue obj attrName
726727
| _ => throwTypeError "attrgetter object takes exactly 1 argument"
728+
else if name == "io.BytesIO" then do
729+
-- Construct a BytesIO instance with _buffer and _pos attrs
730+
let initBuf := match args with
731+
| [.bytes b] => b
732+
| [] => ByteArray.empty
733+
| _ => ByteArray.empty
734+
let mut attrs : Std.HashMap String Value := {}
735+
attrs := attrs.insert "_buffer" (.bytes initBuf)
736+
attrs := attrs.insert "_pos" (.int 0)
737+
-- Create a class object for BytesIO (lightweight)
738+
let cls ← allocClassObj {
739+
name := "BytesIO", bases := #[], mro := #[], ns := {}, slots := none }
740+
match cls with
741+
| .classObj cref => heapSetClassData cref {
742+
name := "BytesIO", bases := #[], mro := #[cls], ns := {}, slots := none }
743+
| _ => pure ()
744+
let instRef ← heapAlloc (.instanceObjData { cls := cls, attrs := attrs })
745+
return .instance instRef
746+
else if name == "io.StringIO" then do
747+
let initBuf := match args with
748+
| [.str s] => s
749+
| [] => ""
750+
| _ => ""
751+
let mut attrs : Std.HashMap String Value := {}
752+
attrs := attrs.insert "_buffer" (.str initBuf)
753+
attrs := attrs.insert "_pos" (.int 0)
754+
let cls ← allocClassObj {
755+
name := "StringIO", bases := #[], mro := #[], ns := {}, slots := none }
756+
match cls with
757+
| .classObj cref => heapSetClassData cref {
758+
name := "StringIO", bases := #[], mro := #[cls], ns := {}, slots := none }
759+
| _ => pure ()
760+
let instRef ← heapAlloc (.instanceObjData { cls := cls, attrs := attrs })
761+
return .instance instRef
727762
else
728763
callBuiltin name args kwargs
729764
| .function ref => do
@@ -1052,6 +1087,17 @@ partial def getAttributeValue (obj : Value) (attr : String) : InterpM Value := d
10521087
match id_.cls with
10531088
| .classObj cref => do
10541089
let cd ← heapGetClassData cref
1090+
-- Builtin instance types: return bound methods for known method names
1091+
if cd.name == "BytesIO" && ["write", "read", "getvalue", "seek", "tell",
1092+
"__enter__", "__exit__", "close"].contains attr then
1093+
match obj with
1094+
| .instance _ => return .boundMethod obj attr
1095+
| _ => pure ()
1096+
if cd.name == "StringIO" && ["write", "read", "getvalue", "seek", "tell",
1097+
"__enter__", "__exit__", "close"].contains attr then
1098+
match obj with
1099+
| .instance _ => return .boundMethod obj attr
1100+
| _ => pure ()
10551101
-- First pass: check for data descriptors (property with getter)
10561102
for mroEntry in cd.mro do
10571103
match mroEntry with
@@ -1470,6 +1516,46 @@ partial def getBuiltinModule (name : String) : InterpM (Option Value) := do
14701516
"Callable", "Hashable", "Sized", "Container"] do
14711517
ns := ns.insert n .none
14721518
some <$> mkMod ns
1519+
| "struct" =>
1520+
let mut ns : Std.HashMap String Value := {}
1521+
ns := ns.insert "pack" (.builtin "struct.pack")
1522+
ns := ns.insert "unpack" (.builtin "struct.unpack")
1523+
ns := ns.insert "calcsize" (.builtin "struct.calcsize")
1524+
some <$> mkMod ns
1525+
| "io" =>
1526+
let mut ns : Std.HashMap String Value := {}
1527+
ns := ns.insert "BytesIO" (.builtin "io.BytesIO")
1528+
ns := ns.insert "StringIO" (.builtin "io.StringIO")
1529+
some <$> mkMod ns
1530+
| "bisect" =>
1531+
let mut ns : Std.HashMap String Value := {}
1532+
ns := ns.insert "bisect_left" (.builtin "bisect.bisect_left")
1533+
ns := ns.insert "bisect_right" (.builtin "bisect.bisect_right")
1534+
ns := ns.insert "bisect" (.builtin "bisect.bisect")
1535+
ns := ns.insert "insort" (.builtin "bisect.insort")
1536+
ns := ns.insert "insort_right" (.builtin "bisect.insort_right")
1537+
ns := ns.insert "insort_left" (.builtin "bisect.insort_left")
1538+
some <$> mkMod ns
1539+
| "base64" =>
1540+
let mut ns : Std.HashMap String Value := {}
1541+
ns := ns.insert "b64encode" (.builtin "base64.b64encode")
1542+
ns := ns.insert "b64decode" (.builtin "base64.b64decode")
1543+
ns := ns.insert "urlsafe_b64encode" (.builtin "base64.urlsafe_b64encode")
1544+
ns := ns.insert "urlsafe_b64decode" (.builtin "base64.urlsafe_b64decode")
1545+
ns := ns.insert "b16encode" (.builtin "base64.b16encode")
1546+
ns := ns.insert "b16decode" (.builtin "base64.b16decode")
1547+
some <$> mkMod ns
1548+
| "json" =>
1549+
let mut ns : Std.HashMap String Value := {}
1550+
ns := ns.insert "dumps" (.builtin "json.dumps")
1551+
ns := ns.insert "loads" (.builtin "json.loads")
1552+
some <$> mkMod ns
1553+
| "re" =>
1554+
let mut ns : Std.HashMap String Value := {}
1555+
for n in ["compile", "match", "search", "sub", "findall", "split",
1556+
"IGNORECASE", "MULTILINE", "DOTALL", "VERBOSE"] do
1557+
ns := ns.insert n (.builtin s!"re.{n}")
1558+
some <$> mkMod ns
14731559
| _ => return none
14741560

14751561
/-- Load a module by fully-qualified name and file path.
@@ -2190,8 +2276,12 @@ partial def callBoundMethod (receiver : Value) (method : String) (args : List Va
21902276
| .instance iref => do
21912277
-- Look up method in instance's class MRO and call with self
21922278
let id_ ← heapGetInstanceData iref
2279+
-- Check for builtin instance types (BytesIO, StringIO)
21932280
match id_.cls with
21942281
| .classObj cref => do
2282+
let cd_ ← heapGetClassData cref
2283+
if cd_.name == "BytesIO" then return ← callBytesIOMethod iref method args
2284+
if cd_.name == "StringIO" then return ← callStringIOMethod iref method args
21952285
let cd ← heapGetClassData cref
21962286
let mut found : Option (Value × Value) := none -- (function, defining class)
21972287
for mroEntry in cd.mro do

LeanPython/Runtime/Builtins.lean

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import LeanPython.Runtime.Ops
22
import LeanPython.Stdlib.Math
3+
import LeanPython.Stdlib.Struct
4+
import LeanPython.Stdlib.IO
5+
import LeanPython.Stdlib.Bisect
6+
import LeanPython.Stdlib.Base64
7+
import LeanPython.Stdlib.Json
38

49
set_option autoImplicit false
510

@@ -9,6 +14,10 @@ open LeanPython.Runtime
914
open LeanPython.Runtime.Ops
1015
open LeanPython.Interpreter
1116
open LeanPython.Stdlib.Math
17+
open LeanPython.Stdlib.Struct
18+
open LeanPython.Stdlib.Bisect
19+
open LeanPython.Stdlib.Base64
20+
open LeanPython.Stdlib.Json
1221

1322
-- ============================================================
1423
-- Individual builtin implementations
@@ -790,6 +799,35 @@ partial def callBuiltin (name : String) (args : List Value)
790799
match args with
791800
| [_] => return .tuple #[]
792801
| _ => throwTypeError "dataclasses.fields() takes exactly 1 argument"
802+
-- ============================================================
803+
-- struct module functions
804+
-- ============================================================
805+
| "struct.pack" => structPack args
806+
| "struct.unpack" => structUnpack args
807+
| "struct.calcsize" => structCalcsize args
808+
-- ============================================================
809+
-- bisect module functions
810+
-- ============================================================
811+
| "bisect.bisect_left" => bisectLeft args kwargs
812+
| "bisect.bisect_right" => bisectRight args kwargs
813+
| "bisect.bisect" => bisectRight args kwargs -- bisect is alias for bisect_right
814+
| "bisect.insort" => bisectInsort args kwargs
815+
| "bisect.insort_right" => bisectInsort args kwargs
816+
| "bisect.insort_left" => bisectInsort args kwargs -- simplified
817+
-- ============================================================
818+
-- base64 module functions
819+
-- ============================================================
820+
| "base64.b64encode" => b64encode args
821+
| "base64.b64decode" => b64decode args
822+
| "base64.urlsafe_b64encode" => urlsafeB64encode args
823+
| "base64.urlsafe_b64decode" => urlsafeB64decode args
824+
| "base64.b16encode" => b16encode args
825+
| "base64.b16decode" => b16decode args
826+
-- ============================================================
827+
-- json module functions
828+
-- ============================================================
829+
| "json.dumps" => jsonDumps args kwargs
830+
| "json.loads" => jsonLoads args
793831
| _ => throwNotImplemented s!"builtin '{name}' is not implemented"
794832

795833
end LeanPython.Runtime.Builtins

LeanPython/Stdlib.lean

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
import LeanPython.Stdlib.Math
2+
import LeanPython.Stdlib.Struct
3+
import LeanPython.Stdlib.IO
4+
import LeanPython.Stdlib.Bisect
5+
import LeanPython.Stdlib.Base64
6+
import LeanPython.Stdlib.Json

LeanPython/Stdlib/Base64.lean

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import LeanPython.Interpreter.Types
2+
3+
set_option autoImplicit false
4+
5+
namespace LeanPython.Stdlib.Base64
6+
7+
open LeanPython.Runtime
8+
open LeanPython.Interpreter
9+
10+
-- ============================================================
11+
-- Base64 encoding/decoding
12+
-- ============================================================
13+
14+
private def stdAlphabet : String := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
15+
private def urlAlphabet : String := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
16+
17+
private def charToIndex (c : Char) (urlSafe : Bool) : Option Nat :=
18+
let alpha := if urlSafe then urlAlphabet else stdAlphabet
19+
alpha.toList.findIdx? (· == c) |>.map id
20+
21+
private partial def encodeBase64 (data : ByteArray) (urlSafe : Bool) : ByteArray := Id.run do
22+
let alpha := if urlSafe then urlAlphabet else stdAlphabet
23+
let alphaArr := alpha.toList.toArray
24+
let mut result := ByteArray.empty
25+
let mut i : Nat := 0
26+
while i + 2 < data.size do
27+
let b0 := (data.get! i).toNat
28+
let b1 := (data.get! (i + 1)).toNat
29+
let b2 := (data.get! (i + 2)).toNat
30+
let n := b0 * 65536 + b1 * 256 + b2
31+
result := result.push (alphaArr[n >>> 18 &&& 63]!).toUInt8
32+
result := result.push (alphaArr[n >>> 12 &&& 63]!).toUInt8
33+
result := result.push (alphaArr[n >>> 6 &&& 63]!).toUInt8
34+
result := result.push (alphaArr[n &&& 63]!).toUInt8
35+
i := i + 3
36+
let remaining := data.size - i
37+
if remaining == 2 then
38+
let b0 := (data.get! i).toNat
39+
let b1 := (data.get! (i + 1)).toNat
40+
let n := b0 * 256 + b1
41+
result := result.push (alphaArr[n >>> 10 &&& 63]!).toUInt8
42+
result := result.push (alphaArr[n >>> 4 &&& 63]!).toUInt8
43+
result := result.push (alphaArr[(n <<< 2) &&& 63]!).toUInt8
44+
result := result.push '='.toUInt8
45+
else if remaining == 1 then
46+
let b0 := (data.get! i).toNat
47+
result := result.push (alphaArr[b0 >>> 2]!).toUInt8
48+
result := result.push (alphaArr[(b0 <<< 4) &&& 63]!).toUInt8
49+
result := result.push '='.toUInt8
50+
result := result.push '='.toUInt8
51+
return result
52+
53+
private partial def decodeBase64 (data : ByteArray) (urlSafe : Bool) : Except String ByteArray := do
54+
-- Strip padding and whitespace
55+
let chars := data.toList.filter (fun b => b.toNat != '='.toNat && b.toNat != '\n'.toNat && b.toNat != '\r'.toNat)
56+
let mut result := ByteArray.empty
57+
let mut buf : Nat := 0
58+
let mut bits : Nat := 0
59+
for b in chars do
60+
let c := Char.ofNat b.toNat
61+
match charToIndex c urlSafe with
62+
| some idx =>
63+
buf := (buf <<< 6) ||| idx
64+
bits := bits + 6
65+
if bits >= 8 then
66+
bits := bits - 8
67+
result := result.push ((buf >>> bits) &&& 255).toUInt8
68+
buf := buf &&& ((1 <<< bits) - 1)
69+
| none => .error s!"Invalid character in base64: '{c}'"
70+
return result
71+
72+
-- ============================================================
73+
-- Base16 (hex) encoding/decoding
74+
-- ============================================================
75+
76+
private def hexCharsUpper : String := "0123456789ABCDEF"
77+
78+
private partial def encodeBase16 (data : ByteArray) : ByteArray := Id.run do
79+
let hexArr := hexCharsUpper.toList.toArray
80+
let mut result := ByteArray.empty
81+
for i in [:data.size] do
82+
let b := (data.get! i).toNat
83+
result := result.push (hexArr[b >>> 4]!).toUInt8
84+
result := result.push (hexArr[b &&& 15]!).toUInt8
85+
return result
86+
87+
private def hexCharToNibble (c : Char) : Option Nat :=
88+
if '0' ≤ c && c ≤ '9' then some (c.toNat - '0'.toNat)
89+
else if 'A' ≤ c && c ≤ 'F' then some (c.toNat - 'A'.toNat + 10)
90+
else if 'a' ≤ c && c ≤ 'f' then some (c.toNat - 'a'.toNat + 10)
91+
else none
92+
93+
private partial def decodeBase16 (data : ByteArray) : Except String ByteArray := do
94+
if data.size % 2 != 0 then .error "Odd-length string"
95+
let mut result := ByteArray.empty
96+
let mut i : Nat := 0
97+
while i + 1 < data.size do
98+
let c1 := Char.ofNat (data.get! i).toNat
99+
let c2 := Char.ofNat (data.get! (i + 1)).toNat
100+
match hexCharToNibble c1, hexCharToNibble c2 with
101+
| some h, some l => result := result.push (h * 16 + l).toUInt8
102+
| _, _ => .error s!"Non-hexadecimal digit found"
103+
i := i + 2
104+
return result
105+
106+
-- ============================================================
107+
-- Python-facing functions
108+
-- ============================================================
109+
110+
/-- Coerce args to ByteArray for base64 functions. -/
111+
private def toByteArray (v : Value) : InterpM ByteArray := do
112+
match v with
113+
| .bytes b => return b
114+
| .str s => return s.toUTF8
115+
| _ => throwTypeError "a bytes-like object is required"
116+
117+
/-- Python base64.b64encode(s) -/
118+
partial def b64encode (args : List Value) : InterpM Value := do
119+
match args with
120+
| [v] => do
121+
let data ← toByteArray v
122+
return .bytes (encodeBase64 data false)
123+
| _ => throwTypeError "b64encode() takes exactly 1 argument"
124+
125+
/-- Python base64.b64decode(s) -/
126+
partial def b64decode (args : List Value) : InterpM Value := do
127+
match args with
128+
| [v] => do
129+
let data ← toByteArray v
130+
match decodeBase64 data false with
131+
| .ok result => return .bytes result
132+
| .error e => throwValueError e
133+
| _ => throwTypeError "b64decode() takes exactly 1 argument"
134+
135+
/-- Python base64.urlsafe_b64encode(s) -/
136+
partial def urlsafeB64encode (args : List Value) : InterpM Value := do
137+
match args with
138+
| [v] => do
139+
let data ← toByteArray v
140+
return .bytes (encodeBase64 data true)
141+
| _ => throwTypeError "urlsafe_b64encode() takes exactly 1 argument"
142+
143+
/-- Python base64.urlsafe_b64decode(s) -/
144+
partial def urlsafeB64decode (args : List Value) : InterpM Value := do
145+
match args with
146+
| [v] => do
147+
let data ← toByteArray v
148+
match decodeBase64 data true with
149+
| .ok result => return .bytes result
150+
| .error e => throwValueError e
151+
| _ => throwTypeError "urlsafe_b64decode() takes exactly 1 argument"
152+
153+
/-- Python base64.b16encode(s) -/
154+
partial def b16encode (args : List Value) : InterpM Value := do
155+
match args with
156+
| [v] => do
157+
let data ← toByteArray v
158+
return .bytes (encodeBase16 data)
159+
| _ => throwTypeError "b16encode() takes exactly 1 argument"
160+
161+
/-- Python base64.b16decode(s) -/
162+
partial def b16decode (args : List Value) : InterpM Value := do
163+
match args with
164+
| [v] => do
165+
let data ← toByteArray v
166+
match decodeBase16 data with
167+
| .ok result => return .bytes result
168+
| .error e => throwValueError e
169+
| _ => throwTypeError "b16decode() takes exactly 1 argument"
170+
171+
end LeanPython.Stdlib.Base64

0 commit comments

Comments
 (0)