Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1fb376b
Revert "SOLR-17153: CloudSolrClient should not throw "Collection not …
gerlowskija Apr 5, 2024
7f3e980
Allow embedded-ZK to run in quorum/ensemble mode
gerlowskija Apr 5, 2024
459ecf6
Revert "Revert "SOLR-17153: CloudSolrClient should not throw "Collect…
gerlowskija Sep 14, 2025
de59401
Merge branch 'main' into spike-zk-quorum
gerlowskija Sep 14, 2025
ff0a9ca
Merge remote-tracking branch 'upstream/main' into pr/2391
epugh Oct 13, 2025
cd6ecc5
Take advantage of Solr node roles to determine when to start embedded zk
epugh Oct 14, 2025
9e6ef68
Strip out old log4j workaround not needed, look at ide warnings.
epugh Oct 14, 2025
479e85f
check in some work to be removed
epugh Oct 14, 2025
0601701
Merge branch 'refs/heads/main' into spike-zk-quorum
janhoy Oct 15, 2025
27ea8e9
Properly clean up ZK server resources
janhoy Oct 15, 2025
6bd62ec
Fix precommit in ZkContainer
janhoy Oct 16, 2025
8ee628d
New test TestEmbeddedZkQuorum
janhoy Oct 16, 2025
d8bac96
Handle standalone case in ZkContainer.initZookeeper
janhoy Oct 16, 2025
c5ee205
Spent too much time on this, backing it out.
epugh Oct 16, 2025
991ba1e
Merge remote-tracking branch 'upstream/main' into spike-zk-quorum
epugh Nov 1, 2025
47e928b
add change log
epugh Nov 1, 2025
7cd745d
Redo explanation to be clearer
epugh Nov 1, 2025
42d5213
update variable name
epugh Nov 1, 2025
f8d4f8a
remove unneed variable and if statement, and add a reminder
epugh Nov 1, 2025
bacb4af
remove the /solr/initialized zk node, it appears to be a multi thread…
epugh Nov 1, 2025
5199b84
Remove intermediate test class and simplify cluster set up
epugh Nov 1, 2025
3f785da
Better nesting of zkServerEnabled check and if in quorum mode...
epugh Nov 1, 2025
367d37b
zkEnabled does actually do anything!
epugh Nov 1, 2025
898897a
Merge branch 'main' into spike-zk-quorum
janhoy Jan 28, 2026
ac9bf9e
Update code to work with latest main
janhoy Jan 28, 2026
227f8cc
Safer port allocation in MiniSolrCloudCluster
janhoy Jan 28, 2026
92b8420
Two new tests for resilience
janhoy Jan 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog/unreleased/spike-zk-quorum.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: capability for Solr to run embedded ZooKeeper in a quorum/ensemble mode, allowing multiple Solr nodes to form a distributed ZooKeeper ensemble within their own processes
type: other # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Eric Pugh
- name: Jason Gerlowski
50 changes: 13 additions & 37 deletions solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,18 @@ public class SolrZkServer {

public static final String ZK_WHITELIST_PROPERTY = "zookeeper.4lw.commands.whitelist";

boolean zkRun = false;
String zkHost;

int solrPort;
Properties props;
SolrZkServerProps zkProps;

private Thread zkThread; // the thread running a zookeeper server, only if zkRun is true
private Thread zkThread; // the thread running a zookeeper server, only if zkServerEnabled is true

private Path dataHome; // o.a.zookeeper.**.QuorumPeerConfig needs a File not a Path
private String confHome;

public SolrZkServer(boolean zkRun, String zkHost, Path dataHome, String confHome, int solrPort) {
this.zkRun = zkRun;
public SolrZkServer(String zkHost, Path dataHome, String confHome, int solrPort) {
this.zkHost = zkHost;
this.dataHome = dataHome;
this.confHome = confHome;
Expand All @@ -71,11 +69,6 @@ public String getClientString() {
return null;
}

// if the string wasn't passed as zkHost, then use the standalone server we started
if (!zkRun) {
return null;
}

InetSocketAddress addr = zkProps.getClientPortAddress();
String hostName;
// We cannot advertise 0.0.0.0, so choose the best host to advertise
Expand All @@ -94,7 +87,6 @@ public void parseConfig() {
// set default data dir
// TODO: use something based on IP+port??? support ensemble all from same solr home?
zkProps.setDataDir(dataHome);
zkProps.zkRun = zkRun;
zkProps.solrPort = Integer.toString(solrPort);
}

Expand All @@ -113,7 +105,7 @@ public void parseConfig() {

try {
props = SolrZkServerProps.getProperties(zooCfgPath);
SolrZkServerProps.injectServers(props, zkRun, zkHost);
SolrZkServerProps.injectServers(props, zkHost);
// This is the address that the embedded Zookeeper will bind to. Like Solr, it defaults to
// "127.0.0.1".
props.setProperty(
Expand All @@ -123,9 +115,8 @@ public void parseConfig() {
}
zkProps.parseProperties(props);
} catch (QuorumPeerConfig.ConfigException | IOException e) {
if (zkRun) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}

throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}

Expand All @@ -134,9 +125,6 @@ public Map<Long, QuorumPeer.QuorumServer> getServers() {
}

public void start() {
if (!zkRun) {
return;
}

if (System.getProperty(ZK_WHITELIST_PROPERTY) == null) {
System.setProperty(ZK_WHITELIST_PROPERTY, "ruok, mntr, conf");
Expand All @@ -163,20 +151,11 @@ public void start() {
},
"embeddedZkServer");

if (zkProps.getServers().size() > 1) {
if (log.isInfoEnabled()) {
log.info(
"STARTING EMBEDDED ENSEMBLE ZOOKEEPER SERVER at port {}, listening on host {}",
zkProps.getClientPortAddress().getPort(),
zkProps.getClientPortAddress().getAddress().getHostAddress());
}
} else {
if (log.isInfoEnabled()) {
log.info(
"STARTING EMBEDDED ENSEMBLE ZOOKEEPER SERVER at port {}, listening on host {}",
zkProps.getClientPortAddress().getPort(),
zkProps.getClientPortAddress().getAddress().getHostAddress());
}
if (log.isInfoEnabled()) {
log.info(
"STARTING EMBEDDED ENSEMBLE ZOOKEEPER SERVER at port {}, listening on host {}",
zkProps.getClientPortAddress().getPort(),
zkProps.getClientPortAddress().getAddress().getHostAddress());
}

zkThread.setDaemon(true);
Expand All @@ -203,9 +182,7 @@ public void start() {
}

public void stop() {
if (!zkRun) {
return;
}

zkThread.interrupt();
}
}
Expand All @@ -216,7 +193,6 @@ class SolrZkServerProps extends QuorumPeerConfig {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

String solrPort; // port that Solr is listening on
boolean zkRun;

/**
* Parse a ZooKeeper configuration file
Expand Down Expand Up @@ -245,10 +221,10 @@ public static Properties getProperties(Path configPath) throws ConfigException {
// Given zkHost=localhost:1111,localhost:2222 this will inject
// server.0=localhost:1112:1113
// server.1=localhost:2223:2224
public static void injectServers(Properties props, boolean zkRun, String zkHost) {
public static void injectServers(Properties props, String zkHost) {

// if clientPort not already set, use zkRun
if (zkRun && props.getProperty("clientPort") == null) {
if (props.getProperty("clientPort") == null) {
// int portIdx = zkRun.lastIndexOf(':');
int portIdx = "".lastIndexOf(':');
if (portIdx > 0) {
Expand Down
2 changes: 1 addition & 1 deletion solr/core/src/java/org/apache/solr/core/CoreContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ public JerseyAppHandlerCache getJerseyAppHandlerCache() {

private final ObjectCache objectCache = new ObjectCache();

public final NodeRoles nodeRoles = new NodeRoles(System.getProperty(NodeRoles.NODE_ROLES_PROP));
public final NodeRoles nodeRoles = new NodeRoles(EnvUtils.getProperty(NodeRoles.NODE_ROLES_PROP));

private final ExecutorService indexSearcherExecutor;

Expand Down
13 changes: 9 additions & 4 deletions solr/core/src/java/org/apache/solr/core/NodeConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,16 @@ public static NodeConfig loadNodeConfig(Path solrHome, Properties nodeProperties
initModules(loader, null);
nodeProperties = SolrXmlConfig.wrapAndSetZkHostFromSysPropIfNeeded(nodeProperties);

// TODO: Only job of this block is to
// delay starting a solr core to satisfy
// ZkFailoverTest test case...
String zkHost = nodeProperties.getProperty(SolrXmlConfig.ZK_HOST);
if (StrUtils.isNotNullOrEmpty(zkHost)) {
NodeRoles nodeRoles = new NodeRoles(EnvUtils.getProperty(NodeRoles.NODE_ROLES_PROP));
boolean zookeeperQuorumNode =
NodeRoles.MODE_ON.equals(nodeRoles.getRoleMode(NodeRoles.Role.ZOOKEEPER_QUORUM));

// This block demonstrates how we pause and wait for a ZooKeeper to be available before
// continuing.
// See the ZkFailoverTest to see how changing solr.cloud.wait.for.zk.seconds impacts this
// capability.
if (StrUtils.isNotNullOrEmpty(zkHost) && !zookeeperQuorumNode) {
int startUpZkTimeOut =
1000
* Integer.getInteger(
Expand Down
12 changes: 12 additions & 0 deletions solr/core/src/java/org/apache/solr/core/NodeRoles.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ public String modeWhenRoleIsAbsent() {
public Set<String> supportedModes() {
return Set.of(MODE_ON, MODE_OFF);
}
},

ZOOKEEPER_QUORUM("zookeeper_quorum") {
@Override
public Set<String> supportedModes() {
return Set.of(MODE_ON, MODE_OFF);
}

@Override
public String modeWhenRoleIsAbsent() {
return MODE_OFF;
}
};

public final String roleName;
Expand Down
Loading
Loading