Skip to content

Commit fea24b1

Browse files
pirapiraclaude
andcommitted
Begin Phase 9a: built-in type subclassing for leanSpec primitive types
Enable Python patterns like `class BaseUint(int, SSZType)` by introducing synthetic class objects for built-in types (int, bytes, object) that participate in MRO computation and provide dunder method dispatch. Key changes: - Add wrappedValue to InstanceData for int/bytes subclass instances - Create synthetic classObj for int/bytes/object with full dunder namespaces - Implement int.__new__ via super().__new__ for subclass construction - Add all int arithmetic/bitwise/comparison dunder methods as builtins - Fix super() to work in __new__ (cls fallback) and with classObj instances - Support custom exception class hierarchy (construction, raise, except) - Make typing.override a callable identity decorator - Improve tuple hashing and isinstance for built-in type subclasses Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2221ba2 commit fea24b1

8 files changed

Lines changed: 589 additions & 30 deletions

File tree

LeanPython/Interpreter/Eval.lean

Lines changed: 428 additions & 7 deletions
Large diffs are not rendered by default.

LeanPython/Interpreter/Types.lean

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ structure InterpreterState where
5252
currentFile : Option String
5353
currentPackage : Option String
5454
defaultFactories : Std.HashMap Nat Value := {}
55+
builtinTypeClasses : Std.HashMap String Value := {}
5556
deriving Inhabited
5657

5758
/-- Create a fresh interpreter state. -/

LeanPython/Runtime/Builtins.lean

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ partial def builtinType (args : List Value) : InterpM Value := do
9898
| .instance ref => do
9999
let id_ ← heapGetInstanceData ref
100100
return id_.cls
101+
| .exception typeName _ => return .builtin typeName
101102
| _ => return .str s!"<class '{typeName v}'>"
102103
| _ => throwTypeError "type() takes 1 or 3 arguments"
103104

@@ -112,6 +113,11 @@ def builtinInt (args : List Value) : InterpM Value := do
112113
match s.toList.dropWhile Char.isWhitespace |>.reverse |>.dropWhile Char.isWhitespace |>.reverse |> String.ofList |>.toInt? with
113114
| some n => return .int n
114115
| none => throwValueError s!"invalid literal for int() with base 10: '{s}'"
116+
| [.instance iref] => do
117+
let id_ ← heapGetInstanceData iref
118+
match id_.wrappedValue with
119+
| some (.int n) => return .int n
120+
| _ => throwTypeError "int() argument must be a string or a number"
115121
| _ => throwTypeError "int() argument must be a string or a number"
116122

117123
/-- `float(x)` - convert to float. -/
@@ -320,6 +326,7 @@ partial def builtinIsinstance (args : List Value) : InterpM Value := do
320326
match name with
321327
| "object" => return .bool true -- everything is an instance of object
322328
| _ =>
329+
-- First check if it's a direct built-in value match
323330
let tn := typeName obj
324331
let isMatch := match name with
325332
| "int" => tn == "int" || tn == "bool"
@@ -332,7 +339,24 @@ partial def builtinIsinstance (args : List Value) : InterpM Value := do
332339
| "set" => tn == "set"
333340
| "bytes" => tn == "bytes"
334341
| _ => false
335-
return .bool isMatch
342+
if isMatch then return .bool true
343+
-- For instances, check if any class in MRO is the synthetic built-in type
344+
match obj with
345+
| .instance iref => do
346+
let id_ ← heapGetInstanceData iref
347+
match id_.cls with
348+
| .classObj cref => do
349+
let cd ← heapGetClassData cref
350+
let mut found := false
351+
for mroEntry in cd.mro do
352+
match mroEntry with
353+
| .classObj mref => do
354+
let mcd ← heapGetClassData mref
355+
if mcd.name == name then found := true
356+
| _ => pure ()
357+
return .bool found
358+
| _ => return .bool false
359+
| _ => return .bool false
336360
| [.instance instRef, .classObj _] =>
337361
return .bool (← isInstanceOfClass instRef args[1]!)
338362
| [.instance instRef, .tuple classes] => do
@@ -341,22 +365,52 @@ partial def builtinIsinstance (args : List Value) : InterpM Value := do
341365
match cls with
342366
| .classObj _ =>
343367
if ← isInstanceOfClass instRef cls then return .bool true
344-
| .builtin "object" => return .bool true
368+
| .builtin bname => do
369+
if bname == "object" then return .bool true
370+
-- Check MRO for synthetic built-in type
371+
let id_ ← heapGetInstanceData instRef
372+
match id_.cls with
373+
| .classObj cref => do
374+
let cd ← heapGetClassData cref
375+
for mroEntry in cd.mro do
376+
match mroEntry with
377+
| .classObj mref => do
378+
let mcd ← heapGetClassData mref
379+
if mcd.name == bname then return .bool true
380+
| _ => pure ()
381+
| _ => pure ()
345382
| _ => pure ()
346383
return .bool false
347384
| [_, .classObj _] => return .bool false -- non-instance is not an instance of a custom class
348385
| _ => throwTypeError "isinstance() takes 2 arguments"
349386

387+
/-- Hash a single value for tuple/object hashing. -/
388+
private partial def hashValue : Value → InterpM Int
389+
| .int n => pure n
390+
| .bool b => pure (if b then 1 else 0)
391+
| .str s => pure (hash s).toNat
392+
| .none => pure 0x345678
393+
| .float f => pure (hash (toString f)).toNat
394+
| .tuple items => do
395+
-- Combine element hashes using simple mixing
396+
let mut h : Int := 0x345678
397+
for item in items do
398+
let ih ← hashValue item
399+
h := h * 1000003 + ih
400+
pure h
401+
| .classObj ref => pure ref
402+
| .instance iref => do
403+
let id_ ← heapGetInstanceData iref
404+
match id_.wrappedValue with
405+
| some v => hashValue v
406+
| none => pure iref -- use heap ref as fallback
407+
| _ => pure 0
408+
350409
/-- `hash(obj)` - basic hash. -/
351-
def builtinHash (args : List Value) : InterpM Value := do
410+
partial def builtinHash (args : List Value) : InterpM Value := do
352411
match args with
353-
| [.int n] => return .int n
354-
| [.bool b] => return .int (if b then 1 else 0)
355-
| [.str s] => return .int (hash s).toNat
356-
| [.none] => return .int 0
357-
| [.float _f] => return .int 0 -- stub
358-
| [.tuple _] => return .int 0 -- stub
359-
| _ => throwTypeError "unhashable type"
412+
| [v] => return .int (← hashValue v)
413+
| _ => throwTypeError "hash() takes exactly 1 argument"
360414

361415
/-- `id(obj)` - identity (stub). -/
362416
def builtinId (args : List Value) : InterpM Value := do
@@ -528,10 +582,13 @@ partial def callBuiltin (name : String) (args : List Value)
528582
| "super" => do
529583
match args with
530584
| [] => do
531-
-- Implicit super(): look up __class__ and self from current scope
585+
-- Implicit super(): look up __class__ and first param from current scope
532586
let cls ← lookupVariable "__class__"
533-
let self ← lookupVariable "self"
534-
return .superObj cls self
587+
-- Try "self" first, then "cls" (for __new__/classmethods)
588+
let inst ← try lookupVariable "self"
589+
catch _ => try lookupVariable "cls"
590+
catch _ => throwTypeError "super(): __class__ cell not found"
591+
return .superObj cls inst
535592
| [cls, inst] => return .superObj cls inst
536593
| _ => throwTypeError "super() takes 0 or 2 arguments"
537594
| "hash" => builtinHash args
@@ -775,6 +832,10 @@ partial def callBuiltin (name : String) (args : List Value)
775832
match args with
776833
| [f] => return f
777834
| _ => throwTypeError "abstractmethod() takes exactly 1 argument"
835+
| "typing.override" => do
836+
match args with
837+
| [f] => return f
838+
| _ => throwTypeError "override() takes exactly 1 argument"
778839
-- ============================================================
779840
-- dataclasses module functions
780841
-- ============================================================

LeanPython/Runtime/Ops.lean

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,7 @@ partial def valueToStr (v : Value) : InterpM String :=
178178
return s!"<function {fd.name}>"
179179
| .builtin name => return s!"<built-in function {name}>"
180180
| .boundMethod _ method => return s!"<bound method {method}>"
181-
| .exception typeName msg =>
182-
if msg.isEmpty then return typeName
183-
else return s!"{typeName}({msg})"
181+
| .exception _typeName msg => return msg
184182
| .generator _ => return "<generator object>"
185183
| .classObj ref => do
186184
let cd ← heapGetClassData ref

LeanPython/Runtime/Types.lean

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,13 @@ structure ClassData where
8585
ns : Std.HashMap String Value
8686
slots : Option (Array String) := none
8787

88-
/-- Runtime instance data stored on the heap. -/
88+
/-- Runtime instance data stored on the heap.
89+
`wrappedValue` stores the underlying built-in value for subclasses of int/bytes
90+
(e.g., `class Uint64(int, SSZType)` produces instances with `wrappedValue := some (.int n)`). -/
8991
structure InstanceData where
9092
cls : Value
9193
attrs : Std.HashMap String Value
94+
wrappedValue : Option Value := none
9295

9396
/-- Runtime module data stored on the heap. -/
9497
structure ModuleData where
@@ -133,6 +136,7 @@ inductive RuntimeError where
133136
| runtimeError : String → RuntimeError
134137
| importError : String → RuntimeError
135138
| moduleNotFound : String → RuntimeError
139+
| customError : String → String → List String → RuntimeError -- typeName, message, parentTypeNames
136140
deriving Repr
137141

138142
instance : ToString RuntimeError where
@@ -151,6 +155,7 @@ instance : ToString RuntimeError where
151155
| .runtimeError s => s!"RuntimeError: {s}"
152156
| .importError s => s!"ImportError: {s}"
153157
| .moduleNotFound s => s!"ModuleNotFoundError: No module named '{s}'"
158+
| .customError tn msg _ => if msg.isEmpty then tn else s!"{tn}: {msg}"
154159

155160
-- ============================================================
156161
-- BEq Value (needed for dict lookup, == operator, in operator)
@@ -221,9 +226,10 @@ partial def Value.toStr : Value → String
221226
| .int n => toString n
222227
| .float f =>
223228
let s := toString f
224-
-- Lean's Float.toString may not match Python exactly, but good enough
225229
s
226230
| .str s => s
231+
-- str(exception) returns just the message (not the type name)
232+
| .exception _typeName msg => msg
227233
| .bytes _b => "b'...'"
228234
| .list _ => "[...]"
229235
| .tuple elems =>
@@ -237,7 +243,6 @@ partial def Value.toStr : Value → String
237243
| .builtin name => s!"<built-in function {name}>"
238244
| .ellipsis => "Ellipsis"
239245
| .boundMethod _ method => s!"<bound method {method}>"
240-
| .exception typeName msg => if msg.isEmpty then typeName else s!"{typeName}({msg})"
241246
| .generator _ => "<generator object>"
242247
| .classObj _ => "<class>"
243248
| .instance _ => "<instance>"
@@ -268,7 +273,7 @@ partial def Value.toRepr : Value → String
268273
| .builtin name => s!"<built-in function {name}>"
269274
| .ellipsis => "Ellipsis"
270275
| .boundMethod _ method => s!"<bound method {method}>"
271-
| .exception typeName msg => if msg.isEmpty then typeName else s!"{typeName}({msg})"
276+
| .exception typeName msg => if msg.isEmpty then typeName else s!"{typeName}('{msg}')"
272277
| .generator _ => "<generator object>"
273278
| .classObj _ => "<class>"
274279
| .instance _ => "<instance>"
@@ -301,6 +306,7 @@ def runtimeErrorTypeName : RuntimeError → String
301306
| .runtimeError _ => "RuntimeError"
302307
| .importError _ => "ImportError"
303308
| .moduleNotFound _ => "ModuleNotFoundError"
309+
| .customError tn _ _ => tn
304310

305311
/-- Get the message portion of a RuntimeError. -/
306312
def runtimeErrorMessage : RuntimeError → String
@@ -309,6 +315,7 @@ def runtimeErrorMessage : RuntimeError → String
309315
| .attributeError s | .overflowError s | .notImplemented s
310316
| .runtimeError s | .importError s | .moduleNotFound s => s
311317
| .stopIteration => ""
318+
| .customError _ msg _ => msg
312319

313320
/-- Check if an exception type matches a handler type, respecting the hierarchy.
314321
`Exception` catches all standard errors, `BaseException` catches everything. -/
@@ -321,6 +328,14 @@ def exceptionMatches (errorTypeName handlerTypeName : String) : Bool :=
321328
else
322329
errorTypeName == handlerTypeName
323330

331+
/-- Check if a custom exception matches a handler, considering parent type names. -/
332+
def customExceptionMatches (error : RuntimeError) (handlerTypeName : String) : Bool :=
333+
match error with
334+
| .customError tn _ parents =>
335+
if handlerTypeName == "BaseException" || handlerTypeName == "Exception" then true
336+
else tn == handlerTypeName || parents.any (· == handlerTypeName)
337+
| other => exceptionMatches (runtimeErrorTypeName other) handlerTypeName
338+
324339
-- ============================================================
325340
-- Builtin name table
326341
-- ============================================================
@@ -350,4 +365,18 @@ def builtinNames : List String :=
350365
def isBuiltinName (name : String) : Bool :=
351366
builtinNames.contains name
352367

368+
/-- Check if a name is a built-in exception class. -/
369+
def isBuiltinExceptionName (name : String) : Bool :=
370+
name == "Exception" || name == "BaseException" ||
371+
name == "ValueError" || name == "TypeError" ||
372+
name == "KeyError" || name == "IndexError" ||
373+
name == "RuntimeError" || name == "ZeroDivisionError" ||
374+
name == "AssertionError" || name == "AttributeError" ||
375+
name == "OverflowError" || name == "StopIteration" ||
376+
name == "NotImplementedError" || name == "NameError" ||
377+
name == "OSError" || name == "IOError" ||
378+
name == "FileNotFoundError" || name == "ImportError" ||
379+
name == "ModuleNotFoundError" || name == "SystemExit" ||
380+
name == "KeyboardInterrupt" || name == "GeneratorExit"
381+
353382
end LeanPython.Runtime

LeanPythonTest/Interpreter.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ private def assertPyError (source errSubstr : String) : IO Unit := do
239239
-- Bare except catches everything
240240
#eval assertPy "try:\n raise ValueError(\"x\")\nexcept:\n print(\"caught\")\n" "caught\n"
241241

242-
-- Exception as binding gives exception value
243-
#eval assertPy "try:\n raise ValueError(\"bad value\")\nexcept ValueError as e:\n print(e)\n" "ValueError(bad value)\n"
242+
-- Exception as binding gives message string
243+
#eval assertPy "try:\n raise ValueError(\"bad value\")\nexcept ValueError as e:\n print(e)\n" "bad value\n"
244244

245245
-- Finally always runs
246246
#eval assertPy "try:\n raise ValueError(\"x\")\nexcept ValueError:\n print(\"caught\")\nfinally:\n print(\"done\")\n" "caught\ndone\n"

LeanPythonTest/Stdlib.lean

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,3 +735,52 @@ private def assertPyError (source errSubstr : String) : IO Unit := do
735735

736736
-- __get_pydantic_core_schema__ classmethod is callable and returns schema
737737
#eval assertPy "from pydantic_core import core_schema\nclass BoundedInt:\n @classmethod\n def __get_pydantic_core_schema__(cls, source_type, handler):\n return core_schema.int_schema(ge=0, lt=100)\nresult = BoundedInt.__get_pydantic_core_schema__(None, None)\nprint(result['type'])" "int\n"
738+
739+
-- ============================================================
740+
-- Phase 9a: leanSpec primitive type support
741+
-- ============================================================
742+
743+
-- Exception hierarchy
744+
#eval assertPy "class SSZError(Exception):\n pass\nclass SSZTypeError(SSZError):\n pass\ntry:\n raise SSZTypeError('bad type')\nexcept SSZError as e:\n print(type(e).__name__)\n print(str(e))" "SSZTypeError\nbad type\n"
745+
746+
-- ABC with classmethod + abstractmethod
747+
#eval assertPy "from abc import ABC, abstractmethod\nclass SSZType(ABC):\n @classmethod\n @abstractmethod\n def is_fixed_size(cls):\n pass\nclass Concrete(SSZType):\n @classmethod\n def is_fixed_size(cls):\n return True\nprint(Concrete.is_fixed_size())" "True\n"
748+
749+
-- @override decorator (identity, doesn't fail)
750+
#eval assertPy "from typing import override\ndef base():\n return 1\n@override\ndef child():\n return 2\nprint(child())" "2\n"
751+
752+
-- Multiple inheritance from int: basic construction
753+
#eval assertPy "from abc import ABC, abstractmethod\nclass SSZType(ABC):\n @classmethod\n @abstractmethod\n def is_fixed_size(cls):\n pass\nclass BaseUint(int, SSZType):\n __slots__ = ()\n BITS = 64\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n @classmethod\n def is_fixed_size(cls):\n return True\nclass Uint64(BaseUint):\n BITS = 64\nx = Uint64(42)\nprint(int(x))\nprint(type(x).__name__)" "42\nUint64\n"
754+
755+
-- isinstance checks for int subclass
756+
#eval assertPy "from abc import ABC, abstractmethod\nclass SSZType(ABC):\n @classmethod\n @abstractmethod\n def is_fixed_size(cls):\n pass\nclass BaseUint(int, SSZType):\n __slots__ = ()\n BITS = 64\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n @classmethod\n def is_fixed_size(cls):\n return True\nclass Uint64(BaseUint):\n BITS = 64\nx = Uint64(42)\nprint(isinstance(x, Uint64))\nprint(isinstance(x, BaseUint))\nprint(isinstance(x, int))\nprint(isinstance(x, SSZType))" "True\nTrue\nTrue\nTrue\n"
757+
758+
-- Arithmetic on int subclasses via dunder methods
759+
#eval assertPy "class BaseUint(int):\n BITS = 64\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n def __add__(self, other):\n return type(self)(super().__add__(other))\nclass Uint64(BaseUint):\n BITS = 64\nx = Uint64(10)\ny = Uint64(20)\nz = x + y\nprint(int(z))\nprint(type(z).__name__)" "30\nUint64\n"
760+
761+
-- Comparison operators on int subclasses
762+
#eval assertPy "class BaseUint(int):\n BITS = 64\n def __new__(cls, value):\n return super().__new__(cls, int(value))\nclass Uint64(BaseUint):\n BITS = 64\na = Uint64(10)\nb = Uint64(20)\nprint(a < b)\nprint(a == Uint64(10))\nprint(a > b)" "True\nTrue\nFalse\n"
763+
764+
-- Range validation in __new__
765+
#eval assertPyError "class SSZValueError(Exception):\n pass\nclass BaseUint(int):\n BITS = 8\n def __new__(cls, value):\n if not isinstance(value, int) or isinstance(value, bool):\n raise TypeError('Expected int')\n int_value = int(value)\n max_value = 2 ** cls.BITS - 1\n if not (0 <= int_value <= max_value):\n raise SSZValueError('out of range')\n return super().__new__(cls, int_value)\nclass Uint8(BaseUint):\n BITS = 8\nUint8(256)" "out of range"
766+
767+
-- Negative value rejected
768+
#eval assertPyError "class SSZValueError(Exception):\n pass\nclass BaseUint(int):\n BITS = 8\n def __new__(cls, value):\n if not isinstance(value, int) or isinstance(value, bool):\n raise TypeError('Expected int')\n int_value = int(value)\n if not (0 <= int_value <= 2 ** cls.BITS - 1):\n raise SSZValueError('out of range')\n return super().__new__(cls, int_value)\nclass Uint8(BaseUint):\n BITS = 8\nUint8(-1)" "out of range"
769+
770+
-- Bool rejected (isinstance(True, int) is True, but we check isinstance(value, bool))
771+
#eval assertPyError "class BaseUint(int):\n BITS = 8\n def __new__(cls, value):\n if not isinstance(value, int) or isinstance(value, bool):\n raise TypeError('Expected int')\n return super().__new__(cls, int(value))\nclass Uint8(BaseUint):\n BITS = 8\nUint8(True)" "Expected int"
772+
773+
-- Bitwise operations on int subclasses
774+
#eval assertPy "class BaseUint(int):\n BITS = 8\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n def __and__(self, other):\n return type(self)(super().__and__(other))\n def __or__(self, other):\n return type(self)(super().__or__(other))\n def __xor__(self, other):\n return type(self)(super().__xor__(other))\nclass Uint8(BaseUint):\n BITS = 8\na = Uint8(0b1100)\nb = Uint8(0b1010)\nprint(int(a & b))\nprint(int(a | b))\nprint(int(a ^ b))" "8\n14\n6\n"
775+
776+
-- to_bytes on int subclass instance
777+
#eval assertPy "class BaseUint(int):\n BITS = 64\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n def serialize_to_bytes(self):\n return int(self).to_bytes(self.BITS // 8, 'little')\nclass Uint64(BaseUint):\n BITS = 64\nx = Uint64(256)\nb = x.serialize_to_bytes()\nprint(len(b))\nprint(b[0])\nprint(b[1])" "8\n0\n1\n"
778+
779+
-- hash() works on int subclass instances (for Pydantic frozen models)
780+
#eval assertPy "class BaseUint(int):\n BITS = 8\n def __new__(cls, value):\n return super().__new__(cls, int(value))\n def __hash__(self):\n return hash((type(self).__name__, int(self)))\nclass Uint8(BaseUint):\n BITS = 8\nx = Uint8(42)\nh = hash(x)\nprint(isinstance(h, int))\nprint(h != 0)" "True\nTrue\n"
781+
782+
-- ClassVar accessible on subclasses
783+
#eval assertPy "class BaseUint(int):\n BITS = 0\n def __new__(cls, value):\n return super().__new__(cls, int(value))\nclass Uint8(BaseUint):\n BITS = 8\nclass Uint64(BaseUint):\n BITS = 64\nprint(Uint8.BITS)\nprint(Uint64.BITS)" "8\n64\n"
784+
785+
-- Multiple Uint subclasses
786+
#eval assertPy "class BaseUint(int):\n BITS = 0\n def __new__(cls, value):\n return super().__new__(cls, int(value))\nclass Uint8(BaseUint):\n BITS = 8\nclass Uint16(BaseUint):\n BITS = 16\nclass Uint32(BaseUint):\n BITS = 32\nclass Uint64(BaseUint):\n BITS = 64\nprint(int(Uint8(255)))\nprint(int(Uint16(1000)))\nprint(int(Uint32(100000)))\nprint(int(Uint64(2**40)))" "255\n1000\n100000\n1099511627776\n"

0 commit comments

Comments
 (0)