Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
369 changes: 369 additions & 0 deletions docs/distributed_sequences.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions include/spock_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ extern const char *const skip_extension[];
extern void create_node(SpockNode *node);
extern void drop_node(Oid nodeid);

extern Oid spock_node_id_for_name(const char *name);

extern SpockNode *get_node(Oid nodeid, bool missing_ok);
extern SpockNode *get_node_by_name(const char *name, bool missing_ok);

Expand Down
183 changes: 183 additions & 0 deletions include/spock_seqam.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*-------------------------------------------------------------------------
*
* spock_seqam.h
* Distributed sequence access methods for Spock.
*
* Copyright (c) 2022-2026, pgEdge, Inc.
*
*-------------------------------------------------------------------------
*/
#ifndef SPOCK_SEQAM_H
#define SPOCK_SEQAM_H

#include "postgres.h"
#include "catalog/pg_sequence.h"
#include "commands/sequence.h" /* SeqTable, nextval_hook_type */
#include "utils/relcache.h"

/*
* Catalog name and column positions for spock.sequence_kind.
*
* Keep these in sync with sql/spock--*.sql. The table is keyed on
* (nspname, relname); seqoid is a non-key cache column.
*/
#define CATALOG_SEQUENCE_KIND "sequence_kind"
#define CATALOG_SEQUENCE_KIND_OID_IDX "spock_sequence_kind_seqoid_idx"
#define Natts_sequence_kind 4
#define Anum_sequence_kind_nspname 1
#define Anum_sequence_kind_relname 2
#define Anum_sequence_kind_kind 3
#define Anum_sequence_kind_seqoid 4

/*
* Hook entry for ALTER SEQUENCE ... RENAME and SET SCHEMA. Called from
* src/spock_executor.c:spock_object_access on OAT_POST_ALTER for a
* RELKIND_SEQUENCE relation (subId == 0). Resolves the row by seqoid via
* the secondary index and updates (nspname, relname) if the live sequence
* has been renamed or moved.
*/
extern void spock_seqam_relocate_sequence_record(Oid seqoid);

/*
* Registered methods. Identifiers are stable across versions and serve as
* the on-disk encoding of the `kind` column when we eventually move it from
* text to a more compact form.
*
* SPOCK_SEQAM_NMETHODS is a sentinel: it must remain one past the last real
* kind so the method-table array sizes correctly when a new kind is added.
*/
typedef enum SpockSeqAmKind
{
SPOCK_SEQAM_LOCAL = 0, /* fall through to in-core nextval */
SPOCK_SEQAM_SNOWFLAKE = 1,
SPOCK_SEQAM_NMETHODS /* leave last */
} SpockSeqAmKind;

/*
* Per-method dispatch table. Filled in by spock_seqam_register_methods()
* at extension load and never mutated after that.
*/
typedef struct SpockSeqAmMethod
{
const char *name; /* "snowflake", "local", ... */
SpockSeqAmKind kind;

/*
* Hot-path nextval. Signature mirrors the SeqAM `nextval` callback
* (the in-flight Paquier patch's SequenceAmRoutine.nextval): receives
* the open sequence relation, the unpacked catalog options, and an
* IN/OUT *last for prefetch reporting. Returns the int64 value to be
* used as nextval()'s result.
*
* Methods that do not prefetch (Snowflake) must set *last to the
* returned value so the in-core CACHE fast path stays neutral.
*/
int64 (*nextval) (Relation rel,
int64 incby, int64 maxv, int64 minv,
int64 cache, bool cycle,
int64 *last);
} SpockSeqAmMethod;

/*
* GUC-controlled defaults. Defined in spock_seqam.c.
*/
extern int spock_seqam_default_kind; /* enum SpockSeqAmKind */

/*
* Snowflake layout constants.
*
* Bit allocation is fixed. PGD chose the same split for SnowflakeId and the
* mathematics of (4096 values / node / ms) are sufficient for any OLTP
* workload; making it configurable is a footgun.
*/
#define SPOCK_SNOWFLAKE_TIMESTAMP_BITS 41
#define SPOCK_SNOWFLAKE_NODE_BITS 10
#define SPOCK_SNOWFLAKE_COUNTER_BITS 12

#define SPOCK_SNOWFLAKE_NODE_SHIFT SPOCK_SNOWFLAKE_COUNTER_BITS
#define SPOCK_SNOWFLAKE_TIMESTAMP_SHIFT (SPOCK_SNOWFLAKE_NODE_BITS + SPOCK_SNOWFLAKE_COUNTER_BITS)

#define SPOCK_SNOWFLAKE_COUNTER_MASK ((UINT64CONST(1) << SPOCK_SNOWFLAKE_COUNTER_BITS) - 1)
#define SPOCK_SNOWFLAKE_NODE_MASK (((UINT64CONST(1) << SPOCK_SNOWFLAKE_NODE_BITS) - 1) \
<< SPOCK_SNOWFLAKE_NODE_SHIFT)

#define SPOCK_SNOWFLAKE_MAX_NODE_ID ((1 << SPOCK_SNOWFLAKE_NODE_BITS) - 1)

/*
* 2023-01-01 00:00:00 UTC as Unix milliseconds. The 41-bit ms timestamp
* field thus runs out around 2092. Chosen to match the standalone
* `snowflake` extension's epoch so values from both extensions decode
* identically (same bit layout, same epoch, same node-id derivation).
*
* Do NOT change this constant after a cluster has generated any
* snowflake values: every previously emitted value would appear to be
* from a different era and could compare incorrectly with newly
* generated ones.
*/
#define SPOCK_SNOWFLAKE_EPOCH_MS INT64CONST(1672531200000)

/*
* Magic word stamped in the special area of a sequence's heap page by
* the in-core sequence machinery. Hardcoded -- core declares the type
* privately in src/backend/commands/sequence.c. Validate on every read
* to catch page-layout drift across PG major versions at buildfarm-
* assert level.
*/
#define SPOCK_SEQUENCE_PAGE_MAGIC 0x1717

typedef struct SpockSequencePageMagic
{
uint32 magic;
} SpockSequencePageMagic;

/*
* Pre-log window for WAL batching, in milliseconds. On every nextval we
* compare the emitted value against last_logged_threshold (stored in
* seq->log_cnt). When we cross the threshold, we WAL-log a reservation
* for the next SPOCK_SNOWFLAKE_LOG_INTERVAL_MS of generation. Crash
* recovery jumps last_value forward by at most this interval -- never
* produces duplicates. Same mechanism core PG uses for stock sequence
* cache, expressed in time instead of count.
*/
#define SPOCK_SNOWFLAKE_LOG_INTERVAL_MS 30

/*
* Module entry point. Called from _PG_init() to register the nextval hook,
* install GUCs and the relcache invalidation callback.
*/
extern void spock_seqam_init(void);

/*
* Drop hook callback. Invoked from src/spock_executor.c:spock_object_access
* on OAT_DROP of a RELKIND_SEQUENCE relation. Deletes the spock.sequence_kind
* row if any.
*/
extern void spock_seqam_drop_sequence_record(Oid seqoid);

/*
* Lookup the catalog kind for a sequence by (nspname, relname). Returns
* false if the catalog is missing (extension not yet installed) or if no
* row exists for the sequence. Used by replication paths to skip
* managed-sequence values that must generate independently per node.
*/
extern bool spock_seqam_lookup_kind_by_name(const char *nspname,
const char *relname,
SpockSeqAmKind *kind_out);

/*
* SQL-callable, registered from sql/spock--*.sql.
*/
extern Datum spock_alter_sequence_set_kind(PG_FUNCTION_ARGS);
extern Datum spock_convert_all_sequences(PG_FUNCTION_ARGS);
extern Datum spock_sequence_hook_available(PG_FUNCTION_ARGS);

/*
* Forward declaration of the snowflake method. Defined in
* src/spock_seqam_snowflake.c.
*/
extern int64 spock_seqam_snowflake_nextval(Relation rel,
int64 incby, int64 maxv, int64 minv,
int64 cache, bool cycle,
int64 *last);

#endif /* SPOCK_SEQAM_H */
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ nav:
- Using Spock in Read-Only Mode: managing/read_only.md
- Using a Trigger to Manage Replication Set Membership: managing/repset_trigger.md
- Using Snowflake Sequences: managing/snowflake.md
- Distributed Sequences (Snowflake via nextval hook): distributed_sequences.md
- Using Lolor to Manage Large Objects: managing/lolor.md
- Using Automatic DDL Replication: managing/spock_autoddl.md
- Adding or Removing Nodes:
Expand Down
172 changes: 172 additions & 0 deletions patches/15/pg15-050-nextval-hook.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index fbcfcddb59e..b2430ea44b8 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -95,6 +95,9 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */
*/
static SeqTableData *last_used_seq = NULL;

+/* Spock: nextval generator hook; see sequence.h for contract. */
+nextval_hook_type nextval_hook = NULL;
+
static void fill_seq_with_data(Relation rel, HeapTuple tuple);
static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum);
static Relation lock_and_open_sequence(SeqTable seq);
@@ -637,24 +640,15 @@ nextval_internal(Oid relid, bool check_permissions)
{
SeqTable elm;
Relation seqrel;
- Buffer buf;
- Page page;
HeapTuple pgstuple;
Form_pg_sequence pgsform;
- HeapTupleData seqdatatuple;
- Form_pg_sequence_data seq;
int64 incby,
maxv,
minv,
cache,
- log,
- fetch,
last;
- int64 result,
- next,
- rescnt = 0;
+ int64 result;
bool cycle;
- bool logit = false;

/* open and lock sequence */
init_sequence(relid, &elm, &seqrel);
@@ -699,12 +693,51 @@ nextval_internal(Oid relid, bool check_permissions)
cycle = pgsform->seqcycle;
ReleaseSysCache(pgstuple);

+ /* Spock: hook overrides; swap for rel->rd_sequenceam->nextval at SeqAM merge. */
+ if (nextval_hook != NULL)
+ result = (*nextval_hook) (seqrel, incby, maxv, minv, cache, cycle, &last);
+ else
+ result = sequence_am_local_nextval(seqrel, incby, maxv, minv,
+ cache, cycle, &last);
Comment on lines +25 to +50
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Initialize last defensively before dispatching to the hook.

last is declared (line 31) but never initialized before &last is passed to nextval_hook at line 47. The local-case helper writes *last unconditionally on entry, but a third-party hook that returns without writing *last (e.g. an early-return error path that returns a sentinel) will cause elm->cached = last; a few lines below to read uninitialized stack memory and poison the in-core CACHE fast path for the remainder of the session. The header comment documents the contract ("Methods that do not prefetch ... must set *last to the returned value") but the cost of a defensive init is one line.

🛡️ Suggested defensive init
 	int64		incby,
 				maxv,
 				minv,
 				cache,
-				last;
+				last = 0;
 	int64		result;
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@patches/15/pg15-050-nextval-hook.diff` around lines 25 - 50, In
nextval_internal, initialize the local variable last defensively before passing
&last to nextval_hook or sequence_am_local_nextval to avoid leaving it
uninitialized if a hook returns without setting *last; locate the
nextval_internal function and add a single initialization of last (e.g., set to
the returned value sentinel or the incoming last value semantics) immediately
before the block that calls nextval_hook/sequence_am_local_nextval so that
subsequent use (elm->cached = last) cannot read uninitialized memory.

+
+ /* save info in local cache */
+ elm->increment = incby;
+ elm->last = result; /* last returned number */
+ elm->cached = last; /* last fetched number */
+ elm->last_valid = true;
+
+ relation_close(seqrel, NoLock);
+ last_used_seq = elm;
+ return result;
+}
+
+/*
+ * Spock: stock per-call generator, factored out of nextval_internal so the
+ * nextval_hook can delegate to it. Caller owns seqrel and the per-session
+ * cache state; *last receives the largest value reserved for the session.
+ */
+int64
+sequence_am_local_nextval(Relation seqrel,
+ int64 incby, int64 maxv, int64 minv,
+ int64 cache, bool cycle,
+ int64 *last)
+{
+ Buffer buf;
+ Page page;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence_data seq;
+ int64 log,
+ fetch;
+ int64 result,
+ next,
+ rescnt = 0;
+ bool logit = false;
+
/* lock page' buffer and read tuple */
seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
page = BufferGetPage(buf);

- elm->increment = incby;
- last = next = result = seq->last_value;
+ *last = next = result = seq->last_value;
fetch = cache;
log = seq->log_cnt;

@@ -791,7 +824,7 @@ nextval_internal(Oid relid, bool check_permissions)
{
log--;
rescnt++;
- last = next;
+ *last = next;
if (rescnt == 1) /* if it's first result - */
result = next; /* it's what to return */
}
@@ -800,13 +833,6 @@ nextval_internal(Oid relid, bool check_permissions)
log -= fetch; /* adjust for any unfetched numbers */
Assert(log >= 0);

- /* save info in local cache */
- elm->last = result; /* last returned number */
- elm->cached = last; /* last fetched number */
- elm->last_valid = true;
-
- last_used_seq = elm;
-
/*
* If something needs to be WAL logged, acquire an xid, so this
* transaction's commit will trigger a WAL flush and wait for syncrep.
@@ -862,7 +888,7 @@ nextval_internal(Oid relid, bool check_permissions)
}

/* Now update sequence tuple to the intended final state */
- seq->last_value = last; /* last fetched number */
+ seq->last_value = *last; /* last fetched number */
seq->is_called = true;
seq->log_cnt = log; /* how much is logged */

@@ -870,8 +896,6 @@ nextval_internal(Oid relid, bool check_permissions)

UnlockReleaseBuffer(buf);

- relation_close(seqrel, NoLock);
-
return result;
}

diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h
index 9da23008101..0fe86199306 100644
--- a/src/include/commands/sequence.h
+++ b/src/include/commands/sequence.h
@@ -20,6 +20,7 @@
#include "nodes/parsenodes.h"
#include "parser/parse_node.h"
#include "storage/relfilenode.h"
+#include "utils/relcache.h"


typedef struct FormData_pg_sequence_data
@@ -55,6 +56,24 @@ extern int64 nextval_internal(Oid relid, bool check_permissions);
extern Datum nextval(PG_FUNCTION_ARGS);
extern List *sequence_options(Oid relid);

+/*
+ * Spock: per-call value-generation hook. Signature matches Paquier's
+ * upstream SeqAM nextval callback; the forward-port replaces the call
+ * site with rel->rd_sequenceam->nextval(...). Single consumer; extension
+ * must reject install when nextval_hook != NULL.
+ */
+typedef int64 (*nextval_hook_type) (Relation rel,
+ int64 incby, int64 maxv, int64 minv,
+ int64 cache, bool cycle,
+ int64 *last);
+extern PGDLLIMPORT nextval_hook_type nextval_hook;
+
+/* Spock: stock per-call generator, factored out so hooks can delegate. */
+extern int64 sequence_am_local_nextval(Relation rel,
+ int64 incby, int64 maxv, int64 minv,
+ int64 cache, bool cycle,
+ int64 *last);
+
extern ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *stmt);
extern ObjectAddress AlterSequence(ParseState *pstate, AlterSeqStmt *stmt);
extern void SequenceChangePersistence(Oid relid, char newrelpersistence);
Loading
Loading