Skip to content
3 changes: 2 additions & 1 deletion src/lang/format.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "table/sym.h"
#include "lang/eval.h"
#include "ops/ops.h" /* RAY_LAZY, ray_lazy_materialize */
#include "ops/internal.h"
#include "mem/heap.h"
#include <stdarg.h>
#include <stdio.h>
Expand Down Expand Up @@ -188,7 +189,7 @@ static void fmt_i64(fmt_buf_t* b, int64_t val) {
}

static void fmt_f64(fmt_buf_t* b, double val) {
if (val == -0.0 && signbit(val)) val = 0.0; /* normalize -0.0 */
val = clear_neg_zero(val);
if (val == 0.0) {
/* Zero: format as "0.0" (after trailing-zero strip) */
char tmp[16];
Expand Down
16 changes: 11 additions & 5 deletions src/ops/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
#include "core/types.h"
#include "io/csv.h"
#include "ops/ops.h"

static inline double clear_neg_zero(double v) {
uint64_t bits; memcpy(&bits, &v, 8);
if (bits == UINT64_C(0x8000000000000000)) v = 0.0;
return v;
}
#include "ops/hash.h"
#include "store/part.h"
#include "store/splay.h"
Expand Down Expand Up @@ -91,7 +97,7 @@ void ray_lang_print(FILE* fp, ray_t* val) {
case -RAY_I64: fprintf(fp, "%ld", (long)val->i64); break;
case -RAY_F64: {
double fv = val->f64;
if (fv == 0.0 && signbit(fv)) fv = 0.0;
fv = clear_neg_zero(fv);
fprintf(fp, "%g", fv);
break;
}
Expand Down Expand Up @@ -177,7 +183,7 @@ static char* fmt_interpolate(const char* fmt, size_t flen, ray_t** args, int64_t
tlen = snprintf(tmp, sizeof(tmp), "%ld", (long)a->i64);
} else if (a->type == -RAY_F64) {
double fv = a->f64;
if (fv == 0.0 && signbit(fv)) fv = 0.0;
fv = clear_neg_zero(fv);
tlen = snprintf(tmp, sizeof(tmp), "%g", fv);
} else if (a->type == -RAY_BOOL) {
tlen = snprintf(tmp, sizeof(tmp), "%s", a->b8 ? "true" : "false");
Expand Down Expand Up @@ -1343,7 +1349,7 @@ ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val) {
}
if (val->type == -RAY_F64) {
double fv = val->f64;
if (fv == 0.0 && signbit(fv)) fv = 0.0;
fv = clear_neg_zero(fv);
char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%g", fv);
return ray_str(buf, (size_t)n2);
}
Expand Down Expand Up @@ -1399,7 +1405,7 @@ ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val) {
else if (val->type == -RAY_I32) n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i32);
else if (val->type == -RAY_F64) {
double fv = val->f64;
if (fv == 0.0 && signbit(fv)) fv = 0.0;
fv = clear_neg_zero(fv);
n2 = snprintf(buf, sizeof(buf), "%.17g", fv);
}
else n2 = snprintf(buf, sizeof(buf), "%lld", (long long)as_i64(val));
Expand Down Expand Up @@ -2556,7 +2562,7 @@ ray_t* ray_group_fn(ray_t* x) {
idx_vecs[gi_nan] = ray_vec_append(idx_vecs[gi_nan], &i);
continue;
}
if (f == 0.0) f = 0.0; /* canonicalise -0.0 → +0.0 */
f = clear_neg_zero(f);
memcpy(&v, &f, sizeof(v));
} else
v = i;
Expand Down
15 changes: 15 additions & 0 deletions src/ops/expr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,7 @@ static void propagate_nulls_binary(ray_t* lhs, ray_t* rhs, ray_t* result,
ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) {
(void)g;
if (!input || RAY_IS_ERR(input)) return input;
if (!ray_is_vec(input)) return ray_error("type", NULL);
int64_t len = input->len;
int8_t in_type = input->type;
int8_t out_type = op->out_type;
Expand Down Expand Up @@ -2027,6 +2028,20 @@ ray_t* exec_elementwise_binary(ray_graph_t* g, ray_op_t* op, ray_t* lhs, ray_t*
}
}

/* Reject string atom in arithmetic context (only comparisons are valid). */
{
bool l_atom_str = (l_scalar && lhs->type == -RAY_STR);
bool r_atom_str = (r_scalar && rhs->type == -RAY_STR);
if (l_atom_str || r_atom_str) {
uint16_t opc = op->opcode;
bool is_cmp = (opc >= OP_EQ && opc <= OP_GE);
if (!is_cmp && !RAY_IS_SYM(lhs->type) && !RAY_IS_SYM(rhs->type)) {
ray_release(result);
return ray_error("type", NULL);
}
}
}

/* SYM vs STR comparison: resolve string constant to intern ID so we
can compare numerically against SYM intern indices.
ray_sym_find returns -1 if string not in table → no match. */
Expand Down
29 changes: 28 additions & 1 deletion src/ops/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,34 @@ ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel) {
ray_t* col = ray_table_get_col_idx(tbl, c);
col_names[c] = ray_table_col_name(tbl, c);
if (!col || RAY_IS_ERR(col)) { new_cols[c] = NULL; continue; }
if (col->type == RAY_MAPCOMMON) { new_cols[c] = NULL; continue; }
if (col->type == RAY_MAPCOMMON) {
ray_t** mc_ptrs = (ray_t**)ray_data(col);
ray_t* kv = mc_ptrs[0];
ray_t* rc = mc_ptrs[1];
if (!kv || !rc || col->len < 2) { new_cols[c] = NULL; continue; }
int64_t n_parts = kv->len;
int8_t kv_type = kv->type;
size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs);
const char* kdata = (const char*)ray_data(kv);
const int64_t* counts = (const int64_t*)ray_data(rc);
ray_t* flat = ray_vec_new(kv_type, pass_count);
if (!flat || RAY_IS_ERR(flat)) { new_cols[c] = NULL; continue; }
flat->len = pass_count;
char* out_mc = (char*)ray_data(flat);
for (int64_t i = 0; i < pass_count; i++) {
int64_t row_i = match_idx[i];
int64_t cum = 0;
int64_t pi = 0;
for (; pi < n_parts - 1; pi++) {
cum += counts[pi];
if (row_i < cum) break;
}
memcpy(out_mc + (size_t)i * esz, kdata + (size_t)pi * esz, esz);
}
new_cols[c] = flat;
valid_ncols++;
continue;
}
int8_t ct = RAY_IS_PARTED(col->type)
? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type;
uint8_t ca = 0;
Expand Down
36 changes: 27 additions & 9 deletions src/ops/group.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ static void cd_hist_fn(void* ctx, uint32_t worker_id,
for (int64_t i = start; i < end; i++) {
double fv = d[i];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
int64_t val;
memcpy(&val, &fv, sizeof(int64_t));
uint64_t h = (uint64_t)val * CD_HASH_K1;
Expand Down Expand Up @@ -540,7 +540,7 @@ static void cd_scatter_fn(void* ctx, uint32_t worker_id,
for (int64_t i = start; i < end; i++) {
double fv = d[i];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
int64_t val;
memcpy(&val, &fv, sizeof(int64_t));
uint64_t h = (uint64_t)val * CD_HASH_K1;
Expand Down Expand Up @@ -592,7 +592,7 @@ static int64_t cd_seq_count(int8_t in_type, uint8_t in_attrs,
if (in_type == RAY_F64) {
double fv = ((const double*)base)[i];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
memcpy(&val, &fv, sizeof(int64_t));
} else {
val = read_col_i64(base, i, in_type, in_attrs);
Expand Down Expand Up @@ -942,7 +942,7 @@ static inline int64_t cdpg_read(const void* base, int64_t r,
if (in_type == RAY_F64) {
double fv = ((const double*)base)[r];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
int64_t v;
memcpy(&v, &fv, sizeof(int64_t));
return v;
Expand Down Expand Up @@ -1288,7 +1288,7 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid,
if (gid < 0 || gid >= n_groups) continue;
double fv = d[r];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
int64_t v;
memcpy(&v, &fv, sizeof(int64_t));
CD_INSERT(v);
Expand Down Expand Up @@ -1338,7 +1338,7 @@ ray_t* ray_count_distinct_per_group(ray_t* src, const int64_t* row_gid,
if (in_type == RAY_F64) {
double fv = ((double*)base)[r];
if (fv != fv) fv = (double)NAN;
else if (fv == 0.0) fv = 0.0;
else fv = clear_neg_zero(fv);
memcpy(&row_val, &fv, sizeof(int64_t));
} else {
row_val = read_col_i64(base, r, in_type, src->attrs);
Expand Down Expand Up @@ -5145,13 +5145,21 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl,
agg_owned[a] = 1;
goto resolve_ins2;
}
if (vec && RAY_IS_ERR(vec)) ray_release(vec);
}
/* Fallback: full recursive evaluation */
ray_t* saved_table = g->table;
g->table = tbl;
ray_t* vec = exec_node(g, agg_input_op);
g->table = saved_table;
if (vec && !RAY_IS_ERR(vec)) {
if (vec && RAY_IS_ERR(vec)) {
for (uint8_t i = 0; i < a; i++)
{ if (agg_owned[i] && agg_vecs[i]) ray_release(agg_vecs[i]); if (agg_owned2[i] && agg_vecs2[i]) ray_release(agg_vecs2[i]); }
for (uint8_t k = 0; k < n_keys; k++)
if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]);
return vec;
}
if (vec) {
agg_vecs[a] = vec;
agg_owned[a] = 1;
}
Expand All @@ -5177,14 +5185,24 @@ ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl,
agg_vecs2[a] = vec;
agg_owned2[a] = 1;
compiled2 = 1;
} else if (vec) {
ray_release(vec);
}
}
if (!compiled2) {
ray_t* saved_table = g->table;
g->table = tbl;
ray_t* vec = exec_node(g, agg_input_op2);
g->table = saved_table;
if (vec && !RAY_IS_ERR(vec)) {
if (vec && RAY_IS_ERR(vec)) {
if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]);
for (uint8_t i = 0; i < a; i++)
{ if (agg_owned[i] && agg_vecs[i]) ray_release(agg_vecs[i]); if (agg_owned2[i] && agg_vecs2[i]) ray_release(agg_vecs2[i]); }
for (uint8_t k = 0; k < n_keys; k++)
if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]);
return vec;
}
if (vec) {
agg_vecs2[a] = vec;
agg_owned2[a] = 1;
}
Expand Down Expand Up @@ -9332,7 +9350,7 @@ static inline int64_t grpt_key_read(const void* base, int8_t t, int64_t row) {
switch (t) {
case RAY_F64: {
double v; memcpy(&v, (const char*)base + (size_t)row*8, 8);
if (v == 0.0) v = 0.0; /* normalize -0.0 → +0.0 to match hash */
v = clear_neg_zero(v);
int64_t bits; memcpy(&bits, &v, 8); return bits;
}
case RAY_I64: case RAY_TIMESTAMP:
Expand Down
3 changes: 2 additions & 1 deletion src/ops/idxop.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/

#include "idxop.h"
#include "ops/internal.h"
#include "mem/heap.h"
#include "mem/cow.h"
#include "vec/vec.h"
Expand Down Expand Up @@ -53,7 +54,7 @@ static uint64_t numeric_key_word(const uint8_t* base, int8_t type, int64_t i) {
double v;
if (es == 4) { float t; memcpy(&t, base + i*4, 4); v = (double)t; }
else { memcpy(&v, base + i*8, 8); }
if (v == 0.0) v = 0.0; /* canonicalise -0.0 -> +0.0 */
v = clear_neg_zero(v);
if (v != v) { /* NaN: per-row bucket via row hash */
return (uint64_t)i * 0x9E3779B97F4A7C15ULL;
}
Expand Down
9 changes: 9 additions & 0 deletions src/ops/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1143,4 +1143,13 @@ static inline void par_finalize_nulls(ray_t* vec) {
}
}

/* Canonicalise IEEE 754 -0.0 → +0.0 via bit-level check.
* Immune to -fno-signed-zeros (which makes `if (f==0) f=0` a no-op).
* Used at output / hash-key boundaries only — not in hot SIMD loops. */
static inline double clear_neg_zero(double v) {
uint64_t bits; memcpy(&bits, &v, 8);
if (bits == UINT64_C(0x8000000000000000)) v = 0.0;
return v;
}

#endif /* RAY_EXEC_INTERNAL_H */
2 changes: 1 addition & 1 deletion src/ops/pivot.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) {
if (pt == RAY_F64) {
double fv;
memcpy(&fv, &pval, 8);
if (fv == 0.0 && signbit(fv)) fv = 0.0;
fv = clear_neg_zero(fv);
len = snprintf(buf, sizeof(buf), "%g", fv);
} else if (pt == RAY_BOOL) {
len = snprintf(buf, sizeof(buf), "%s", pval ? "true" : "false");
Expand Down
2 changes: 1 addition & 1 deletion src/ops/query.c
Original file line number Diff line number Diff line change
Expand Up @@ -3928,7 +3928,7 @@ static void cdpg_buf_par_fn(void* vctx, uint32_t worker_id,
int64_t r = idxs[i];
double fv = d[r];
if (has_nulls && fv != fv) continue;
if (fv == 0.0) fv = 0.0;
fv = clear_neg_zero(fv);
int64_t vbits = 0;
memcpy(&vbits, &fv, sizeof(int64_t));
CDPG_BUF_INSERT(vbits);
Expand Down
2 changes: 1 addition & 1 deletion src/ops/tblop.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ ray_t* ray_pivot_fn(ray_t** args, int64_t n) {
char buf[64]; int len = snprintf(buf, sizeof(buf), "%ld", (long)pval->i64);
col_sym = ray_sym_intern(buf, (size_t)len);
} else if (pval->type == -RAY_F64) {
double fv = pval->f64; if (fv == 0.0 && signbit(fv)) fv = 0.0;
double fv = clear_neg_zero(pval->f64);
char buf[64]; int len = snprintf(buf, sizeof(buf), "%g", fv);
col_sym = ray_sym_intern(buf, (size_t)len);
} else if (pval->type == -RAY_BOOL) {
Expand Down
17 changes: 17 additions & 0 deletions test/rfl/agg/avg.rfl
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,20 @@
;; constant vector: avg == that constant
(set K 42)
(avg (take (enlist K) 50)) -- 42.0

;; ─── edge cases: single-element / negatives / empty list ─────────────
(avg [42]) -- 42.0
(avg [-5]) -- -5.0
(avg [-10 -20 -30]) -- -20.0
(avg [0 0 0 0]) -- 0.0
;; empty list → domain error (line 314 in ray_avg_fn)
(avg (list)) !- domain
;; single-element list
(avg (list 7)) -- 7.0
;; list with all nulls → typed null F64
(nil? (avg (list 0Ni 0Ni 0Ni))) -- true
;; F64 atom passes through
(avg 3.14) -- 3.14
(avg -2.5) -- -2.5
;; null atom → typed null F64
(nil? (avg 0Nf)) -- true
22 changes: 22 additions & 0 deletions test/rfl/agg/count.rfl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,25 @@

;; til n has count n
(count (til 123)) -- 123

;; ─── edge cases: empty / single / atom / heterogeneous list ──────────
;; empty string -> count 0
(count "") -- 0
;; single-char string -> count 1
(count "x") -- 1
;; empty list (heterogeneous) -> 0
(count (list)) -- 0
;; list with elements -> ray_len
(count (list 1 "two" 'three)) -- 3
;; F64 atom -> 1
(count 3.14) -- 1
;; SYM atom -> 1
(count 'foo) -- 1
;; null atom -> 1
(count 0Ni) -- 1
;; dict (atom_eq treats dict as collection)
(count (dict ['a 'b 'c] [1 2 3])) -- 3
;; table count == nrow
(count (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) -- 5
;; empty table
(count (table [a b] (list (as 'I64 []) (as 'I64 [])))) -- 0
10 changes: 10 additions & 0 deletions test/rfl/agg/list_med_var.rfl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@

;; ─── med type error on non-list non-vec (line 519) ───────────────────
(med 'some_sym) !- type
;; Atom path hits 491; dict / table hit the trailing 519 guard
;; (not atom, not vec, not list, no parted dispatch).
(med (dict ["a" "b"] [1 2])) !- type
(med (table [k] (list [1 2 3]))) !- type

;; ─── var/stddev on list (var_stddev_core list branch: lines 593-607) ──
;; Basic: list [1 2 3 4 5], pop_var = 2.0, sample_var = 2.5
Expand Down Expand Up @@ -72,6 +76,12 @@
;; ─── var type error on non-list non-vec (line 606) ───────────────────
(var_pop 'sym_input) !- type
(dev 'sym_input) !- type
;; Dict / table fall through the atom / vec / list dispatch and land
;; on the trailing 606 guard.
(var (dict ["a" "b"] [1 2])) !- type
(var_pop (table [k] (list [1 2 3]))) !- type
(stddev (dict ["x"] [1])) !- type
(dev (table [k] (list [1 2]))) !- type

;; ─── vec_to_f64_scratch type error path (lines 475-476) ─────────────
;; SYM vec is not numeric → error from vec_to_f64_scratch.
Expand Down
14 changes: 14 additions & 0 deletions test/rfl/agg/max.rfl
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,17 @@

;; max >= min
1 -- (as 'I64 (>= (max V) (min V)))

;; ─── edge cases: single-element / all-equal / negatives / atoms ──────
(max [42]) -- 42
(max [7 7 7 7]) -- 7
(max [-1 -5 -10 0]) -- 0
(max [-5.5 -1.2 -10.3 -0.5]) -- -0.5
(max [3.14 1.41 2.71]) -- 3.14
;; max over a list with null skips the null
(max (list 5 0Ni 3)) -- 5
;; list of all-nulls → typed null
(nil? (max (list 0Ni 0Ni))) -- true
;; atom pass-through
(max 99) -- 99
(max -7) -- -7
Loading
Loading