Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
ab05bdd
Start with test
HoustonPutman Jul 21, 2025
8328c78
Solve issue, but need to clean up. Need to fix for Restore as well
HoustonPutman Jul 21, 2025
d1d6bf5
Use new shardrequest constructs for SyncStrategy
HoustonPutman Jul 21, 2025
f5ee234
Response status should now be the number of replicas
HoustonPutman Jul 21, 2025
02d1e90
Cleanup unused parts of tests
HoustonPutman Jul 22, 2025
1be4ed0
Huge commit - restore uses installshard - big update in locking
HoustonPutman Jul 24, 2025
8365a23
Implement callingLock mirroring for distributed API Manager locking
HoustonPutman Aug 1, 2025
00a67b4
Huge commit - restore uses installshard - big update in locking
HoustonPutman Jul 24, 2025
2d5e12b
Implement callingLock mirroring for distributed API Manager locking
HoustonPutman Aug 1, 2025
37cea68
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Sep 2, 2025
293f35d
Merge remote-tracking branch 'apache/main' into locking-update
HoustonPutman Dec 2, 2025
242235a
Changelog
HoustonPutman Dec 2, 2025
4a2c562
Merge remote-tracking branch 'apache/main' into fix-restore-partial-s…
HoustonPutman Dec 2, 2025
b4c3b7b
WIP - testing
HoustonPutman Dec 4, 2025
72ae4bf
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 5, 2026
da3e6cd
Merge remote-tracking branch 'apache/main' into fix-restore-partial-s…
HoustonPutman Jan 5, 2026
43d1091
Removing leadership starts a recovery, don't issue a second recovery
HoustonPutman Jan 6, 2026
e037654
Handle async success/failure better for collectionHandlingUtils
HoustonPutman Jan 6, 2026
8323e72
Small fixes for installShardCmd
HoustonPutman Jan 6, 2026
e9c09d6
Add tests, fix recovery contention
HoustonPutman Jan 8, 2026
5e4c1be
Remove uneeded testing code
HoustonPutman Jan 8, 2026
25e9ecd
Tidy
HoustonPutman Jan 8, 2026
b0452f4
Support distributed locks better
HoustonPutman Jan 8, 2026
72401ca
Start switching over to AdminCmdContext
HoustonPutman Jan 8, 2026
6dac593
Refactor how locking is passed through collections api commands.
HoustonPutman Jan 9, 2026
e368c48
Make some bug fixes
HoustonPutman Jan 10, 2026
97352f7
Some test 'fixes', need to rethink v2 tests still
HoustonPutman Jan 10, 2026
6f2d61a
Add real tests for DeleteAlias and DeleteNode v2 apis
HoustonPutman Jan 12, 2026
74176a4
Tidy
HoustonPutman Jan 12, 2026
df0888c
Fix broken tests
HoustonPutman Jan 13, 2026
3627d29
Add missing test utility class
HoustonPutman Jan 13, 2026
4a49aa9
Move over remaining APIs and tests
HoustonPutman Jan 13, 2026
021303a
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 13, 2026
e61279c
Merge remote-tracking branch 'apache/main' into solr-18011-locking-up…
HoustonPutman Jan 17, 2026
4c9a766
Fix changelog entry
HoustonPutman Jan 17, 2026
c7cfe19
Remove files that shouldn't be changed.
HoustonPutman Jan 17, 2026
08326cc
One more that shouldn't be changed
HoustonPutman Jan 17, 2026
eaddd46
Merge branch 'solr-18011-locking-update' into fix-restore-partial-suc…
HoustonPutman Jan 17, 2026
b90c283
Some more fixes
HoustonPutman Jan 17, 2026
b13d97e
SOLR-18080: Initiate Leader election for ShardTerms
HoustonPutman Jan 21, 2026
7522caa
Add changelog entry
HoustonPutman Jan 21, 2026
776c28b
Fix precommit issues
HoustonPutman Jan 21, 2026
f3e6ff8
Fixes for passing callingLockIds around
HoustonPutman Jan 22, 2026
a78ece5
Merge branch 'solr-18080-shard-term-induce-leader-election' into fix-…
HoustonPutman Jan 23, 2026
d2dad8e
Fixes for syncStrategy with empty indexes
HoustonPutman Jan 23, 2026
198f01b
Improve tests for recovery
HoustonPutman Jan 24, 2026
a667bb4
Make read only check better
HoustonPutman Jan 24, 2026
ed8c839
Improve logging for results
HoustonPutman Jan 24, 2026
3b898f9
Various fixes
HoustonPutman Jan 24, 2026
bb37120
Let some parts of solr fetch an index writer for a read-only core
HoustonPutman Jan 27, 2026
3135c5b
Tidy
HoustonPutman Jan 27, 2026
e3d1d26
Fix collection and shard term deletion
HoustonPutman Jan 27, 2026
40b62f6
Tidy
HoustonPutman Jan 27, 2026
da598a5
make another wait conditional on not readOnly
HoustonPutman Jan 27, 2026
1b71b72
Add changelog entry, remove others
HoustonPutman Jan 27, 2026
f6062da
Fix no uLog error case
HoustonPutman Jan 28, 2026
714a705
Fix test
HoustonPutman Jan 28, 2026
201d31d
InstallShardTest failure scenario should not just be s3
HoustonPutman Jan 28, 2026
71aae5b
Make some fixes, primarily indexFetcher checking files that are still…
HoustonPutman Jan 28, 2026
062ef49
Address some review comments
HoustonPutman Jan 28, 2026
3f200ac
Change flag to failOnReadOnly
HoustonPutman Jan 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
title: Fix error scenario in InstallShardData and Restore
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
authors:
- name: Houston Putman
nick: HoustonPutman
links:
- name: SOLR-17821
url: https://issues.apache.org/jira/browse/SOLR-17821
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,9 @@ public class InstallShardDataRequestBody {

@JsonProperty public String repository;

@JsonProperty public String name;

@JsonProperty public String shardBackupId;

@JsonProperty public String async;
}
6 changes: 6 additions & 0 deletions solr/core/src/java/org/apache/solr/api/V2HttpCall.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.solr.api;

import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.CALLING_LOCK_IDS_HEADER;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.ADMIN_OR_REMOTEPROXY;
import static org.apache.solr.servlet.SolrDispatchFilter.Action.PROCESS;
Expand Down Expand Up @@ -218,6 +219,11 @@ private void initAdminRequest(String path) throws Exception {
solrReq.getContext().put(CoreContainer.class.getName(), cores);
requestType = AuthorizationContext.RequestType.ADMIN;
action = ADMIN;

String callingLockIds = req.getHeader(CALLING_LOCK_IDS_HEADER);
if (callingLockIds != null && !callingLockIds.isBlank()) {
solrReq.getContext().put(CALLING_LOCK_IDS_HEADER, callingLockIds);
}
}

protected void parseRequest() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.solr.cloud;

import java.util.List;
import org.apache.solr.cloud.api.collections.CollectionApiLockFactory;
import org.apache.solr.common.params.CollectionParams;

Expand Down Expand Up @@ -62,5 +63,6 @@ DistributedLock createLock(
CollectionParams.LockLevel level,
String collName,
String shardId,
String replicaName);
String replicaName,
List<String> callingLockIds);
}
4 changes: 4 additions & 0 deletions solr/core/src/java/org/apache/solr/cloud/DistributedLock.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ public interface DistributedLock {
void release();

boolean isAcquired();

String getLockId();

boolean isMirroringLock();
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.annotations.VisibleForTesting;
import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -46,7 +47,12 @@ public void waitUntilAcquired() {
for (DistributedLock lock : locks) {
log.debug("DistributedMultiLock.waitUntilAcquired. About to wait on lock {}", lock);
lock.waitUntilAcquired();
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
if (lock.isMirroringLock()) {
log.debug(
"DistributedMultiLock.waitUntilAcquired. Mirroring already-acquired lock {}", lock);
} else {
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
}
}
}

Expand All @@ -70,6 +76,10 @@ public boolean isAcquired() {
return true;
}

public String getLockId() {
return locks.stream().map(DistributedLock::getLockId).collect(Collectors.joining(","));
}

@VisibleForTesting
public int getCountInternalLocks() {
return locks.size();
Expand Down
87 changes: 79 additions & 8 deletions solr/core/src/java/org/apache/solr/cloud/LockTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.solr.cloud.OverseerMessageHandler.Lock;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CollectionParams.LockLevel;
Expand All @@ -38,20 +39,35 @@ public class LockTree {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Node root = new Node(null, LockLevel.CLUSTER, null);

public final Map<String, Lock> allLocks = new HashMap<>();

private class LockImpl implements Lock {
final Node node;
final String id;

LockImpl(Node node) {
this.node = node;
this.id = UUID.randomUUID().toString();
}

@Override
public void unlock() {
synchronized (LockTree.this) {
node.unlock(this);
allLocks.remove(id);
}
}

@Override
public String id() {
return id;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return node.validateSubpath(lockLevel, path);
}

@Override
public String toString() {
return StrUtils.join(node.constructPath(new ArrayDeque<>()), '/');
Expand All @@ -71,12 +87,33 @@ public String toString() {
public class Session {
private SessionNode root = new SessionNode(LockLevel.CLUSTER);

public Lock lock(CollectionParams.CollectionAction action, List<String> path) {
public Lock lock(
CollectionParams.CollectionAction action, List<String> path, List<String> callingLockIds) {
if (action.lockLevel == LockLevel.NONE) return FREELOCK;
log.debug("Calling lock level: {}", callingLockIds);
Node startingNode = LockTree.this.root;
SessionNode startingSession = root;

// If a callingLockId was passed in, validate it with the current lock path, and only start
// locking below the calling lock
Lock callingLock = callingLockIds != null ? allLocks.get(callingLockIds.getLast()) : null;
boolean ignoreCallingLock = false;
if (callingLock != null && callingLock.validateSubpath(action.lockLevel.getHeight(), path)) {
startingNode = ((LockImpl) callingLock).node;
startingSession = startingSession.find(startingNode.level.getHeight(), path);
if (startingSession == null) {
startingSession = root;
}
ignoreCallingLock = true;
}
synchronized (LockTree.this) {
if (root.isBusy(action.lockLevel, path)) return null;
Lock lockObject = LockTree.this.root.lock(action.lockLevel, path);
if (lockObject == null) root.markBusy(action.lockLevel, path);
if (startingSession.isBusy(action.lockLevel, path)) return null;
Lock lockObject = startingNode.lock(action.lockLevel, path, ignoreCallingLock);
if (lockObject == null) {
startingSession.markBusy(action.lockLevel, path);
} else {
allLocks.put(lockObject.id(), lockObject);
}
return lockObject;
}
}
Expand Down Expand Up @@ -125,6 +162,18 @@ boolean isBusy(LockLevel lockLevel, List<String> path) {
return false;
}
}

SessionNode find(int lockLevel, List<String> path) {
if (level.getHeight() == lockLevel) {
return this;
} else if (level.getHeight() < lockLevel
&& kids != null
&& kids.containsKey(path.get(level.getHeight()))) {
return kids.get(path.get(level.getHeight())).find(lockLevel, path);
} else {
return null;
}
}
}

public Session getSession() {
Expand Down Expand Up @@ -158,8 +207,9 @@ void unlock(LockImpl lockObject) {
}
}

Lock lock(LockLevel lockLevel, List<String> path) {
if (myLock != null) return null; // I'm already locked. no need to go any further
Lock lock(LockLevel lockLevel, List<String> path, boolean ignoreCurrentLock) {
if (myLock != null && !ignoreCurrentLock)
return null; // I'm already locked. no need to go any further
if (lockLevel == level) {
// lock is supposed to be acquired at this level
// If I am locked or any of my children or grandchildren are locked
Expand All @@ -171,16 +221,37 @@ Lock lock(LockLevel lockLevel, List<String> path) {
Node child = children.get(childName);
if (child == null)
children.put(childName, child = new Node(childName, level.getChild(), this));
return child.lock(lockLevel, path);
return child.lock(lockLevel, path, false);
}
}

boolean validateSubpath(int lockLevel, List<String> path) {
return level.getHeight() < lockLevel
&& (level.getHeight() == 0 || name.equals(path.get(level.getHeight() - 1)))
&& (mom == null || mom.validateSubpath(lockLevel, path));
}

ArrayDeque<String> constructPath(ArrayDeque<String> collect) {
if (name != null) collect.addFirst(name);
if (mom != null) mom.constructPath(collect);
return collect;
}
}

static final Lock FREELOCK = () -> {};
static final String FREELOCK_ID = "-1";
static final Lock FREELOCK =
new Lock() {
@Override
public void unlock() {}

@Override
public String id() {
return FREELOCK_ID;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
Expand Down Expand Up @@ -61,7 +62,7 @@ public OverseerConfigSetMessageHandler(ZkStateReader zkStateReader, CoreContaine
}

@Override
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) {
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock) {
NamedList<Object> results = new NamedList<>();
try {
if (!operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
Expand Down Expand Up @@ -117,7 +118,22 @@ public Lock lockTask(ZkNodeProps message, long ignored) {
String configSetName = getTaskKey(message);
if (canExecute(configSetName, message)) {
markExclusiveTask(configSetName, message);
return () -> unmarkExclusiveTask(configSetName, message);
return new Lock() {
@Override
public void unlock() {
unmarkExclusiveTask(configSetName, message);
}

@Override
public String id() {
return configSetName;
}

@Override
public boolean validateSubpath(int lockLevel, List<String> path) {
return false;
}
};
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.solr.cloud;

import java.util.List;
import org.apache.solr.common.cloud.ZkNodeProps;

/** Interface for processing messages received by an {@link OverseerTaskProcessor} */
Expand All @@ -26,7 +27,7 @@ public interface OverseerMessageHandler {
* @param operation the operation to process
* @return response
*/
OverseerSolrResponse processMessage(ZkNodeProps message, String operation);
OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock);

/**
* @return the name of the OverseerMessageHandler
Expand All @@ -41,6 +42,10 @@ public interface OverseerMessageHandler {

interface Lock {
void unlock();

String id();

boolean validateSubpath(int lockLevel, List<String> path);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ public void run() {
if (log.isDebugEnabled()) {
log.debug("Runner processing {}", head.getId());
}
response = messageHandler.processMessage(message, operation);
response = messageHandler.processMessage(message, operation, lock);
} finally {
timerContext.stop();
updateStats(statsName);
Expand Down
Loading