Status: Authoritative source of truth
Versioning: Spec v1.0 (maps to ZGraph 1.x)
Scope: ZGraph library (in-memory storages, algorithms, formats, conversion, CLI). ZGraphDB is a separate paged container that reuses these formats and calls into ZGraph algorithms.
In Zig, unit tests live with their source. Integration/e2e and other suites live under
tests/*and are wired viabuild.zig.
- One trait surface: All algorithms operate across any storage (list/matrix/incidence/CSR).
- Compile-time semantics:
directed,weightedas comptime flags, no runtime branches in hot loops. - Attributes first‑class: Node/edge KV attributes (
int|float|bool|string), columnar under the hood. - Zero-copy where possible: mmap
.zgraphb, zstd-chunked blocks, CSR overlays. - Memory-bounded conversions: multiple strategies with verifiable ceilings.
- Forward-compatible formats: readers ignore unknown sections/blocks. Round-trip
.zgraph⇄.zgraphb. - Determinism: canonical iteration order and serialized output for reproducible builds.
- Safety: explicit allocators, clear ownership; synchronization in parallel builders only.
ZGraph/
├─ README.md # quickstart
├─ ZGraph.Spec.md # THIS file (canonical spec & checklist)
├─ LICENSE
├─ build.zig
├─ zig.mod
├─ docs/
│ ├─ formats/
│ │ ├─ zgraph-text-spec.md # extracted from §6.1
│ │ └─ zgraphb-binary-spec.md # extracted from §6.2
│ ├─ algorithms/*.md # explainer docs per family
│ ├─ conversion_strategies.md
│ ├─ perf_tuning.md
│ └─ safety_and_threading.md
├─ src/
│ ├─ root.zig
│ ├─ lib/
│ │ ├─ core/
│ │ │ ├─ types.zig # common types & Error sets
│ │ │ ├─ graph_like.zig # trait surface + adapters
│ │ │ ├─ attributes.zig # schema + column store + API
│ │ │ ├─ graph_storage.zig # GraphStorage(Enum..) factory
│ │ │ └─ iterators.zig # NodeIter, EdgeFromIter, AllEdgeIter
│ │ ├─ data_structures/
│ │ │ ├─ adjacency_list.zig
│ │ │ ├─ adjacency_matrix.zig
│ │ │ └─ incidence_matrix.zig
│ │ ├─ indices/
│ │ │ ├─ csr.zig # forward CSR
│ │ │ └─ csr_reverse.zig # reverse CSR
│ │ ├─ formats/
│ │ │ ├─ zgraph_text.zig # streaming reader/writer
│ │ │ └─ zgraphb.zig # chunked binary reader/writer + mmap
│ │ ├─ conversion/
│ │ │ ├─ strategies.zig # public API (ConvertStrategy, convertStorage)
│ │ │ ├─ duplicate_convert.zig
│ │ │ ├─ streamed_convert.zig
│ │ │ ├─ chunked_convert.zig
│ │ │ └─ cow_convert.zig
│ │ ├─ algorithms/
│ │ │ ├─ traversal/
│ │ │ │ ├─ bfs.zig
│ │ │ │ └─ dfs.zig
│ │ │ ├─ connectivity/
│ │ │ │ ├─ components.zig # UF + CC
│ │ │ │ └─ scc.zig # Tarjan / Kosaraju
│ │ │ ├─ shortest_paths/
│ │ │ │ ├─ dijkstra.zig
│ │ │ │ ├─ bellman_ford.zig
│ │ │ │ └─ floyd_warshall.zig
│ │ │ ├─ flow/
│ │ │ │ ├─ edmonds_karp.zig
│ │ │ │ └─ dinic.zig
│ │ │ ├─ centrality/
│ │ │ │ ├─ pagerank.zig
│ │ │ │ └─ betweenness.zig
│ │ │ └─ spectral/
│ │ │ ├─ laplacian.zig
│ │ │ └─ power_iteration.zig
│ │ ├─ io/
│ │ │ ├─ file.zig # path/FS utils
│ │ │ └─ zstd.zig # compression shim
│ │ ├─ mem/
│ │ │ ├─ alloc.zig # GPA/Arena convenience
│ │ │ └─ pool.zig # edge/node pools (optional)
│ │ └─ util/
│ │ ├─ bitset.zig
│ │ ├─ hash.zig
│ │ └─ time.zig
│ └─ cli/
│ ├─ zgraph.zig # CLI entry
│ └─ subcommands/
│ ├─ convert.zig
│ ├─ validate.zig
│ ├─ build_index.zig
│ └─ stats.zig
├─ tests/
│ ├─ integration/
│ │ ├─ convert_roundtrip_test.zig
│ │ ├─ conversion_memory_ceiling_test.zig
│ │ └─ algorithms_cross_storage_test.zig
│ ├─ e2e/
│ │ ├─ cli_convert_and_stats_test.zig
│ │ └─ cli_build_index_test.zig
│ ├─ property/
│ │ ├─ random_graph_equivalence_test.zig
│ │ └─ formats_fuzz_text_binary.zig
│ └─ data/
│ ├─ tiny/*.zgraph
│ └─ medium/*.zgraph
└─ benches/
├─ micro/*.zig
└─ macro/*.zigpub const NodeId = u64;
pub const Weight = f64; // Present only when weighted=true
pub const Semantic = struct {
directed: bool,
weighted: bool,
};
pub const GraphError = error{
OutOfMemory,
InvalidNode,
InvalidEdge,
EdgeNotFound,
EdgeAlreadyExists,
GraphNotWeighted,
ParseError,
UnsupportedVersion,
IoError,
SchemaMismatch,
Overflow,
};pub fn GraphLike(comptime directed: bool, comptime weighted: bool) type {
return struct {
// Required
pub fn nodeCount(self: *const @This()) usize;
pub fn edgeCount(self: *const @This()) usize;
pub fn hasNode(self: *const @This(), u: NodeId) bool;
pub fn hasEdge(self: *const @This(), u: NodeId, v: NodeId) bool;
pub fn addNode(self: *@This(), u: NodeId) !void;
pub fn removeNode(self: *@This(), u: NodeId) !void;
pub fn addEdge(self: *@This(), u: NodeId, v: NodeId, weight: if (weighted) Weight else void) !void;
pub fn removeEdge(self: *@This(), u: NodeId, v: NodeId) !void;
// Iteration (stable order)
pub fn nodeIter(self: *const @This()) NodeIter;
pub fn edgeFromIter(self: *const @This(), u: NodeId) EdgeFromIter;
// Optional weight accessor
pub fn getWeight(self: *const @This(), u: NodeId, v: NodeId) if (weighted) ?Weight else void;
// Attributes are attached via Attributes API (§5) and accessed by id
};
}pub const NodeIter = struct {
// next() -> ?NodeId
};
pub const EdgeFromIter = struct {
// next() -> ?struct{ dst: NodeId, weight: if (weighted) ?Weight else void }
};
pub const AllEdgeIter = struct {
// next() -> ?struct{ src: NodeId, dst: NodeId, weight: if (weighted) ?Weight else void }
};pub const GraphStorageType = enum { AdjacencyList, AdjacencyMatrix, IncidenceMatrix };
pub fn GraphStorage(comptime st: GraphStorageType, comptime directed: bool, comptime weighted: bool) type;Each storage implements the GraphLike interface via thin adapters (or directly match the surface).
pub const ValueType = enum { Int, Float, Bool, String };
pub const Value = union(ValueType) {
Int: i64,
Float: f64,
Bool: bool,
String: u32, // interned string id (0xFFFFFFFF == null)
};
pub const ColumnDesc = struct { name: []const u8, ty: ValueType };
pub const Schema = struct {
node: []const ColumnDesc,
edge: []const ColumnDesc,
};- Node columns: arrays keyed by node index (sparse map for arbitrary NodeId → dense index mapping).
- Edge columns: arrays keyed by edge ordinal (storage-provided stable edge id) or
(src,dst)mapping. - Strings: UTF‑8 table with dedup; id
0xFFFFFFFF= null.
pub const Attributes = struct {
pub fn init(alloc: std.mem.Allocator, schema: Schema) !Attributes;
pub fn deinit(self: *Attributes) void;
// Node attributes
pub fn setNode(self: *Attributes, node: NodeId, key: []const u8, val: Value) !void;
pub fn getNode(self: *const Attributes, node: NodeId, key: []const u8) ?Value;
// Edge attributes (by endpoints; storage adapter resolves edge id)
pub fn setEdge(self: *Attributes, src: NodeId, dst: NodeId, key: []const u8, val: Value) !void;
pub fn getEdge(self: *const Attributes, src: NodeId, dst: NodeId, key: []const u8) ?Value;
// String table
pub fn intern(self: *Attributes, s: []const u8) !u32;
pub fn lookup(self: *const Attributes, id: u32) ?[]const u8;
};pub const AttrFilter = struct {
// Example: key="active", op="==", val=true
key: []const u8,
op: enum { Eq, Ne, Lt, Le, Gt, Ge, Exists },
val: ?Value = null,
};
// Applied via iterator adaptors:
pub fn filteredNodeIter(g: anytype, attrs: *const Attributes, filter: AttrFilter) NodeIter;Header (required):
#!zgraph 1
graph = directed|undirected
weights = weighted|unweighted
schema.node = name:string,group:int,active:bool
schema.edge = label:string,capacity:floatSections (order-free, repeatable, append behavior):
[NODES]rows:id, <node-props-by-schema>[EDGES]- if weighted:
src, dst, weight, <edge-props> - else:
src, dst, <edge-props>
- if weighted:
[META]free-formkey=valuelines (ignored by reader unless known)[INDEX]hints (optional), ignored by reader[TAGS]free-form labels
CSV rules: RFC4180-ish; quoted fields with ""; empty -> unset.
Reader/Writer API (formats/zgraph_text.zig):
pub const TextOptions = struct { canonical: bool = true };
pub fn read(alloc: Allocator, rdr: anytype) !struct{
semantic: Semantic,
schema: Schema,
nodes: []NodeId,
edges: []struct{ src: NodeId, dst: NodeId, weight: ?Weight },
attrs: Attributes,
};
pub fn write(alloc: Allocator, w: anytype, g: anytype, attrs: *const Attributes, opts: TextOptions) !void;Header:
- Magic:
ZGB1 - Version: u16
- Flags: bitfield (directed, weighted, varint_ids, has_indices, compression)
- Offsets to blocks; global CRC32
Blocks (individually zstd-compressed if enabled):
- StringTable: contiguous UTF‑8, u32 ids
- SchemaBlock: column descriptors (node/edge)
- NodeTable: COUNT, varint NodeId list (or fixed u64), optional NodeId→dense map
- EdgeTable: COUNT, varint src, varint dst,
[f64 weight]if weighted - NodeColumns / EdgeColumns: per-column typed vectors
- Indices (optional): CSR, ReverseCSR, DegreeTable
- Meta: key/value table
Reader/Writer API (formats/zgraphb.zig):
pub const BinaryOptions = struct {
compression: enum { none, zstd } = .zstd,
block_size: usize = 1 << 20, // 1 MiB
mmap: bool = true,
};
pub fn read(alloc: Allocator, rdr_or_path: anytype) !struct{
semantic: Semantic,
schema: Schema,
provider: ChunkProvider, // zero-copy block access if mmap
attrs: AttributesView, // read-only view
edge_index: ?CSRView,
};
pub fn write(alloc: Allocator, w_or_path: anytype, g: anytype, attrs: *const Attributes, opts: BinaryOptions) !void;Forward compatibility: unknown blocks skipped; offsets allow appending blocks; version gates behavior.
pub const ConvertStrategy = enum { Duplicate, Streamed, Chunked, CopyOnWrite };
pub const ConvertOpts = struct {
max_bytes_in_flight: usize = 0, // 0 = auto
chunk_nodes: usize = 0, // only for Chunked
destroy_source: bool = false, // allowed for Streamed
};
pub fn convertStorage(
alloc: Allocator,
comptime Src: type,
comptime dst_type: GraphStorageType,
comptime directed: bool,
comptime weighted: bool,
src: *Src,
strategy: ConvertStrategy,
opts: ConvertOpts,
) !anytype; // returns destination storage- Duplicate: peak ≈ |src| + |dst|; no source mutation.
- Streamed: peak bounded by
max(chunk, dst_window); may mutate/destroy source ifdestroy_source=true. - Chunked: shard nodes; parallelizable; peak ≈ |chunk| + overhead.
- CopyOnWrite: lazy migration via adapter; background compaction optional.
- Node/edge/weight parity after conversion.
- Memory ceilings verified under synthetic big graphs (integration tests).
pub const CSR = struct {
// row_offsets.len = nodeCount + 1
row_offsets: []usize,
col_indices: []NodeId,
weights: if (weighted) ?[]Weight else void,
pub fn build(alloc: Allocator, g: anytype) !CSR;
pub fn deinit(self: *CSR, alloc: Allocator) void;
pub fn neighbors(self: *const CSR, u: NodeId) []const NodeId; // O(1) slice
};pub const CSRReverse = struct {
// same shape as CSR but on incoming edges
pub fn build(alloc: Allocator, g: anytype) !CSRReverse;
};pub const DegreeTable = struct {
in_deg: []u32, // or omitted if undirected
out_deg: []u32,
pub fn build(alloc: Allocator, g: anytype) !DegreeTable;
};Persistence: optional blocks in .zgraphb with checksums; rebuilt if absent or stale.
pub fn bfs(alloc: Allocator, g: anytype, src: NodeId) ![]NodeId;
pub fn dfs_preorder(alloc: Allocator, g: anytype, src: NodeId) ![]NodeId;
pub fn connected_components(alloc: Allocator, g: anytype) ![]u32; // label per nodepub fn dijkstra(alloc: Allocator, g: anytype, src: NodeId) !struct{ dist: []f64, prev: []?NodeId };
pub fn bellman_ford(alloc: Allocator, g: anytype, src: NodeId) !struct{ dist: []f64, prev: []?NodeId, has_neg_cycle: bool };
pub fn floyd_warshall(alloc: Allocator, g: anytype) ![][]f64;pub fn strongly_connected(alloc: Allocator, g: anytype) ![]u32; // component id per nodepub fn edmonds_karp(alloc: Allocator, g: anytype, s: NodeId, t: NodeId, capacity_key: []const u8) !f64;
pub fn dinic(alloc: Allocator, g: anytype, s: NodeId, t: NodeId, capacity_key: []const u8) !f64;pub fn pagerank(alloc: Allocator, g: anytype, damping: f64, iters: usize, tol: f64) ![]f64;
pub fn betweenness(alloc: Allocator, g: anytype, normalized: bool) ![]f64;pub fn laplacian(alloc: Allocator, g: anytype) !struct{ L: anytype };
pub fn power_iteration(alloc: Allocator, A: anytype, iters: usize, tol: f64) !struct{ eigvec: []f64, eigval: f64 };Choice of layout at runtime: decision table maps algorithm → preferred index/storage; auto-build CSR if enabled in options.
zgraph convert --in <path> --out <path> [--to <list|matrix|incidence>] [--binary|--text] [--compression zstd|none] [--strategy duplicate|streamed|chunked|cow] [--max-bytes N]
zgraph validate --in <path>
zgraph build-index --in <path> [--csr] [--reverse]
zgraph stats --in <path> [--json]Exit codes: non-zero on invalid input/failed validation.
JSON stats: nodes, edges, degree histograms, components, optional timings.
- Unit tests with sources (Zig convention) — storages, formats readers/writers, algorithms.
- Integration tests (
tests/integration) — round trips, cross-storage equivalence, conversion invariants, memory ceilings. - E2E tests (
tests/e2e) — CLI flows. - Property tests (
tests/property) — random graphs, fuzz text/binary parsers. - Data fixtures (
tests/data) — tiny & medium graphs. build.zigprovides:zig build test,zig build test-all, and filtered suites.
- Parse
.zgraphb: ≥ 2 GB/s per core when uncompressed; with zstd, ≥ 700 MB/s/core (dict-enabled). - CSR neighbor iteration: contiguous scans; no allocations after build.
- Conversion: streamed peak memory within ±5% of configured ceiling on synthetic workload.
- Parallel build (incidence/CSR): ≥ 80% CPU utilization on 8 cores.
- Thread safety: graph mutation not thread-safe unless explicitly documented (e.g., parallel builders).
- Readers (mmap
.zgraphb) are thread-safe; shared immutable structures. - Allocator knobs: GPA, Arena, Page; document trade-offs (docs/perf_tuning.md).
.zgraphis tolerant: unknown sections ignored..zgraphbblocks are skippable; new blocks may be appended; version gates semantics.- Idempotent round-trips: text ⇄ binary ⇄ in-memory produce equivalent graphs (counts, edges, weights, attributes).
- Algorithms return identical results across storages (given same semantics and adequate precision).
- Conversions preserve edges/weights/attributes; indices rebuild or persist.
- Formats round-trip with no data loss; unknown data preserved where spec allows.
- CLI exposes all core flows; non-zero exit for invalid inputs; JSON stats match library queries.
- Documentation & examples compile; decision tables match implementation.
- GraphLike surface + iterators (adapters for all storages).
- Attributes: schema + column store + API + tests.
.zgraph: streaming reader/writer + round-trip tests..zgraphb: chunked+zstd + mmap reader + parity tests.- Conversion strategies: Duplicate → Streamed → Chunked → COW (+ ceiling tests).
- CSR & Reverse CSR (optional overlays) + degree table + persistence blocks.
- Algorithms unification: traversal → shortest paths → SCC → flow → centrality → spectral.
- CLI: convert / validate / build-index / stats.
- Property/fuzz/benches/CI + docs (conversion, perf, safety).
const GS = @import("src/lib/core/graph_storage.zig").GraphStorage;
const GL = @import("src/lib/core/graph_like.zig");
const Attr = @import("src/lib/core/attributes.zig");
const Cvt = @import("src/lib/conversion/strategies.zig");
const FTxt = @import("src/lib/formats/zgraph_text.zig");
const FBin = @import("src/lib/formats/zgraphb.zig");
const CSR = @import("src/lib/indices/csr.zig");
// Example:
const Storage = GS(.AdjacencyList, true, true);
var g = Storage.init(alloc);
defer g.deinit();
try g.addEdge(1, 2, 3.5);
var attrs = try Attr.Attributes.init(alloc, .{
.node = &[_]Attr.ColumnDesc{ .{ .name="name", .ty=.String } },
.edge = &[_]Attr.ColumnDesc{ .{ .name="capacity", .ty=.Float } },
});
defer attrs.deinit();
try attrs.setEdge(1, 2, "capacity", .{ .Float=10.0 });
try FTxt.write(alloc, stdout, g, &attrs, .{});