Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
561 changes: 551 additions & 10 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ base64 = "0.22"
crc32fast = "1.4"
flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }

# RFC 0019 — numerics and serialization.
num-bigint = "0.4"
num-integer = "0.1"
num-traits = "0.2"
num-rational = "0.4"
byteorder = "1.5"
encoding_rs = "0.8"
bzip2 = { version = "0.4", features = ["static"] }
xz2 = "0.1"
rusqlite = { version = "0.31", features = ["bundled"] }
rust_decimal = "1.36"

# Test/bench-only.
insta = { version = "1.40", features = ["yaml"] }
proptest = "1.5"
Expand Down
3 changes: 3 additions & 0 deletions crates/weavepy-compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@ weavepy-parser = { workspace = true }
indexmap = { workspace = true }
thiserror = { workspace = true }

# RFC 0019 — numerics and serialization.
num-bigint = { workspace = true }

[lints]
workspace = true
50 changes: 49 additions & 1 deletion crates/weavepy-compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,22 @@ fn format_constant(c: &Constant) -> String {
Constant::None => "None".to_owned(),
Constant::Bool(b) => if *b { "True" } else { "False" }.to_owned(),
Constant::Int(i) => i.to_string(),
Constant::BigInt(b) => b.to_string(),
Constant::Float(f) => {
if f.fract() == 0.0 && f.is_finite() {
format!("{f:.1}")
} else {
f.to_string()
}
}
Constant::Complex(real, imag) => {
if *real == 0.0 {
format!("{imag}j")
} else {
let sep = if imag.is_sign_positive() { "+" } else { "" };
format!("({real}{sep}{imag}j)")
}
}
Constant::Str(s) => format!("'{s}'"),
Constant::Bytes(_) => "b'...'".to_owned(),
Constant::Tuple(items) => {
Expand All @@ -229,25 +238,64 @@ fn format_constant(c: &Constant) -> String {
/// Includes nested [`CodeObject`]s so function definitions can carry
/// their compiled body as a constant (matching CPython's `co_consts`
/// containing nested code objects).
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone)]
pub enum Constant {
None,
Bool(bool),
Int(i64),
/// Arbitrary-precision integer (RFC 0019). Stored as a
/// `num_bigint::BigInt` so the compiler can hand it to the VM
/// directly without re-parsing.
BigInt(num_bigint::BigInt),
Float(f64),
/// Complex literal `(real, imag)` (RFC 0019).
Complex(f64, f64),
Str(String),
Bytes(Vec<u8>),
Tuple(Vec<Constant>),
Code(Box<CodeObject>),
Ellipsis,
}

impl PartialEq for Constant {
fn eq(&self, other: &Self) -> bool {
use Constant as C;
match (self, other) {
(C::None, C::None) => true,
(C::Bool(a), C::Bool(b)) => a == b,
(C::Int(a), C::Int(b)) => a == b,
(C::BigInt(a), C::BigInt(b)) => a == b,
(C::Float(a), C::Float(b)) => a.to_bits() == b.to_bits(),
(C::Complex(ar, ai), C::Complex(br, bi)) => {
ar.to_bits() == br.to_bits() && ai.to_bits() == bi.to_bits()
}
(C::Str(a), C::Str(b)) => a == b,
(C::Bytes(a), C::Bytes(b)) => a == b,
(C::Tuple(a), C::Tuple(b)) => a == b,
(C::Code(_), C::Code(_)) => false,
(C::Ellipsis, C::Ellipsis) => true,
// Cross-type equality is intentionally rejected so that
// the const-pool deduplication preserves CPython's
// `1 != 1.0` semantics for interned constants.
_ => false,
}
}
}

impl From<AstConstant> for Constant {
fn from(c: AstConstant) -> Self {
match c {
AstConstant::None => Self::None,
AstConstant::Bool(b) => Self::Bool(b),
AstConstant::Int(i) => Self::Int(i),
AstConstant::BigInt(s) => match s.parse::<num_bigint::BigInt>() {
Ok(b) => Self::BigInt(b),
// The AST parser only produces a `BigInt` variant when
// the string is well-formed; round-tripping should be
// total. Defensive fallback to zero.
Err(_) => Self::Int(0),
},
AstConstant::Complex(real, imag) => Self::Complex(real, imag),
AstConstant::Float(f) => Self::Float(f),
AstConstant::Str(s) => Self::Str(s),
AstConstant::Bytes(b) => Self::Bytes(b),
Expand Down
3 changes: 3 additions & 0 deletions crates/weavepy-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,8 @@ categories.workspace = true
weavepy-lexer = { workspace = true }
thiserror = { workspace = true }

# RFC 0019 — numerics and serialization.
num-bigint = { workspace = true }

[lints]
workspace = true
22 changes: 22 additions & 0 deletions crates/weavepy-parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,16 @@ pub enum Constant {
None,
Bool(bool),
Int(i64),
/// Arbitrary-precision integer literal (RFC 0019). The compiler
/// emits `Object::Long` for these. Represented as decimal-string
/// + sign so the AST stays cheap to clone and `PartialEq`-able
/// without reaching for `num_bigint::BigInt` here.
BigInt(String),
Float(f64),
/// `(real, imag)` for complex literals (RFC 0019). The lexer
/// already accepts the trailing `j`/`J`; the parser routes the
/// numeric body here.
Complex(f64, f64),
Str(String),
Bytes(Vec<u8>),
Tuple(Vec<Constant>),
Expand Down Expand Up @@ -1420,6 +1429,19 @@ fn dump_constant(out: &mut String, c: &Constant) {
Constant::None => out.push_str("None"),
Constant::Bool(b) => out.push_str(if *b { "True" } else { "False" }),
Constant::Int(i) => out.push_str(&i.to_string()),
Constant::BigInt(repr) => out.push_str(repr),
Constant::Complex(real, imag) => {
if *real == 0.0 {
out.push_str(&format!("{imag}j"));
} else {
out.push('(');
out.push_str(&format!("{real}"));
if imag.is_sign_positive() {
out.push('+');
}
out.push_str(&format!("{imag}j)"));
}
}
Constant::Float(f) => {
// Match CPython repr style for common floats; full
// parity is out of scope for the slice.
Expand Down
103 changes: 80 additions & 23 deletions crates/weavepy-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,12 @@ impl<'src> Parser<'src> {
let value = match value {
Constant::Int(i) => Constant::Int(-i),
Constant::Float(f) => Constant::Float(-f),
Constant::BigInt(s) => Constant::BigInt(if let Some(stripped) = s.strip_prefix('-') {
stripped.to_owned()
} else {
format!("-{s}")
}),
Constant::Complex(real, imag) => Constant::Complex(-real, -imag),
other => other,
};
return Ok(Expr {
Expand Down Expand Up @@ -3165,40 +3171,91 @@ fn decode_bytes_body(s: &str, raw: bool) -> Result<Vec<u8>, String> {
}

fn parse_number(lex: &str) -> Result<Constant, String> {
use num_bigint::BigInt;

let cleaned: String = lex.chars().filter(|c| *c != '_').collect();

// Imaginary suffix: peel `j`/`J` and parse the body as a float.
if cleaned.ends_with('j') || cleaned.ends_with('J') {
return Err("complex numbers not supported in slice (see RFC 0001)".to_owned());
let body = &cleaned[..cleaned.len() - 1];
let imag: f64 = body
.parse()
.map_err(|e: std::num::ParseFloatError| e.to_string())?;
return Ok(Constant::Complex(0.0, imag));
}
if let Some(rest) = cleaned
.strip_prefix("0x")
.or_else(|| cleaned.strip_prefix("0X"))
{
let n = i64::from_str_radix(rest, 16).map_err(|e| e.to_string())?;
return Ok(Constant::Int(n));

// Integer literal in a non-decimal radix.
let try_radix = |prefix_lo: &str, prefix_hi: &str, radix: u32| -> Option<&str> {
cleaned
.strip_prefix(prefix_lo)
.or_else(|| cleaned.strip_prefix(prefix_hi))
.map(|r| {
let _ = radix;
r
})
};
if let Some(rest) = try_radix("0x", "0X", 16) {
return parse_radix_int(rest, 16);
}
if let Some(rest) = cleaned
.strip_prefix("0o")
.or_else(|| cleaned.strip_prefix("0O"))
{
let n = i64::from_str_radix(rest, 8).map_err(|e| e.to_string())?;
return Ok(Constant::Int(n));
if let Some(rest) = try_radix("0o", "0O", 8) {
return parse_radix_int(rest, 8);
}
if let Some(rest) = cleaned
.strip_prefix("0b")
.or_else(|| cleaned.strip_prefix("0B"))
{
let n = i64::from_str_radix(rest, 2).map_err(|e| e.to_string())?;
return Ok(Constant::Int(n));
if let Some(rest) = try_radix("0b", "0B", 2) {
return parse_radix_int(rest, 2);
}

// Float literal.
let has_float_marker = cleaned.contains('.') || cleaned.contains('e') || cleaned.contains('E');
if has_float_marker {
let f: f64 = cleaned
.parse()
.map_err(|e: std::num::ParseFloatError| e.to_string())?;
return Ok(Constant::Float(f));
}
let n: i64 = cleaned
.parse()
.map_err(|e: std::num::ParseIntError| e.to_string())?;
Ok(Constant::Int(n))

// Decimal integer; promote to BigInt on overflow.
if let Ok(n) = cleaned.parse::<i64>() {
return Ok(Constant::Int(n));
}
let big: BigInt = cleaned.parse().map_err(|_| "invalid integer literal".to_owned())?;
if let Some(small) = big_to_i64(&big) {
return Ok(Constant::Int(small));
}
Ok(Constant::BigInt(big.to_string()))
}

fn parse_radix_int(rest: &str, radix: u32) -> Result<Constant, String> {
use num_bigint::BigInt;

if let Ok(n) = i64::from_str_radix(rest, radix) {
return Ok(Constant::Int(n));
}
let big = BigInt::parse_bytes(rest.as_bytes(), radix)
.ok_or_else(|| "invalid integer literal".to_owned())?;
if let Some(small) = big_to_i64(&big) {
return Ok(Constant::Int(small));
}
Ok(Constant::BigInt(big.to_string()))
}

fn big_to_i64(b: &num_bigint::BigInt) -> Option<i64> {
use num_bigint::Sign;
let (sign, digits) = b.to_u64_digits();
match digits.len() {
0 => Some(0),
1 => {
let v = digits[0];
match sign {
Sign::Plus | Sign::NoSign => i64::try_from(v).ok(),
Sign::Minus => {
if v == (i64::MAX as u64) + 1 {
Some(i64::MIN)
} else {
i64::try_from(v).ok().map(|n| -n)
}
}
}
}
_ => None,
}
}
12 changes: 12 additions & 0 deletions crates/weavepy-vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,17 @@ base64 = { workspace = true }
crc32fast = { workspace = true }
flate2 = { workspace = true }

# RFC 0019 — numerics and serialization.
num-bigint = { workspace = true }
num-integer = { workspace = true }
num-traits = { workspace = true }
num-rational = { workspace = true }
byteorder = { workspace = true }
encoding_rs = { workspace = true }
bzip2 = { workspace = true }
xz2 = { workspace = true }
rusqlite = { workspace = true }
rust_decimal = { workspace = true }

[lints]
workspace = true
28 changes: 28 additions & 0 deletions crates/weavepy-vm/src/builtin_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ impl BuiltinTypes {
}
}
}
// RFC 0019 — install numeric/bytes class methods.
install_numeric_class_methods(&bt);
bt
}

Expand Down Expand Up @@ -1164,3 +1166,29 @@ pub fn instance_is_subclass(obj: &Object, cls: &TypeObject) -> bool {
_ => false,
}
}

/// RFC 0019 — install class methods on the numeric / bytes types.
/// Adds `int.from_bytes`, `bytes.fromhex`, `bytearray.fromhex`,
/// and `float.fromhex` as classmethod-shaped builtins so that
/// `int.from_bytes(b'\\x00\\xff', 'big')` resolves through the
/// type's MRO rather than the instance method dispatch.
fn install_numeric_class_methods(bt: &BuiltinTypes) {
use crate::object::BuiltinFn;
fn install(ty: &Rc<TypeObject>, name: &'static str, f: fn(&[Object]) -> Result<Object, RuntimeError>) {
let builtin = Object::Builtin(Rc::new(BuiltinFn {
name,
call: Box::new(f),
}));
// Wrap as `classmethod` so descriptor binding skips the
// instance and routes through the class.
let cm = Object::ClassMethod(Rc::new(builtin));
ty.dict
.borrow_mut()
.insert(DictKey(Object::from_static(name)), cm);
}

install(&bt.int_, "from_bytes", crate::builtins::b_int_from_bytes_cls);
install(&bt.bytes_, "fromhex", crate::builtins::b_bytes_fromhex_cls);
install(&bt.bytearray_, "fromhex", crate::builtins::b_bytearray_fromhex_cls);
install(&bt.float_, "fromhex", crate::builtins::b_float_fromhex_cls);
}
Loading
Loading