use of org.apache.solr.update.SolrCmdDistributor.Error in project lucene-solr by apache.
the class SolrCmdDistributorTest method test.
@Test
@ShardsFixed(num = 4)
public void test() throws Exception {
del("*:*");
SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(updateShardHandler);
ModifiableSolrParams params = new ModifiableSolrParams();
List<Node> nodes = new ArrayList<>();
ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, ((HttpSolrClient) controlClient).getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
// add one doc to controlClient
AddUpdateCommand cmd = new AddUpdateCommand(null);
cmd.solrDoc = sdoc("id", id.incrementAndGet());
params = new ModifiableSolrParams();
cmdDistrib.distribAdd(cmd, nodes, params);
CommitUpdateCommand ccmd = new CommitUpdateCommand(null, false);
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribCommit(ccmd, nodes, params);
cmdDistrib.finish();
List<Error> errors = cmdDistrib.getErrors();
assertEquals(errors.toString(), 0, errors.size());
long numFound = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertEquals(1, numFound);
HttpSolrClient client = (HttpSolrClient) clients.get(0);
nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, client.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
// add another 2 docs to control and 3 to client
cmdDistrib = new SolrCmdDistributor(updateShardHandler);
cmd.solrDoc = sdoc("id", id.incrementAndGet());
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribAdd(cmd, nodes, params);
int id2 = id.incrementAndGet();
AddUpdateCommand cmd2 = new AddUpdateCommand(null);
cmd2.solrDoc = sdoc("id", id2);
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribAdd(cmd2, nodes, params);
AddUpdateCommand cmd3 = new AddUpdateCommand(null);
cmd3.solrDoc = sdoc("id", id.incrementAndGet());
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribAdd(cmd3, Collections.singletonList(nodes.get(1)), params);
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribCommit(ccmd, nodes, params);
cmdDistrib.finish();
errors = cmdDistrib.getErrors();
assertEquals(errors.toString(), 0, errors.size());
SolrDocumentList results = controlClient.query(new SolrQuery("*:*")).getResults();
numFound = results.getNumFound();
assertEquals(results.toString(), 3, numFound);
numFound = client.query(new SolrQuery("*:*")).getResults().getNumFound();
assertEquals(3, numFound);
// now delete doc 2 which is on both control and client1
DeleteUpdateCommand dcmd = new DeleteUpdateCommand(null);
dcmd.id = Integer.toString(id2);
cmdDistrib = new SolrCmdDistributor(updateShardHandler);
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribDelete(dcmd, nodes, params);
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribCommit(ccmd, nodes, params);
cmdDistrib.finish();
errors = cmdDistrib.getErrors();
assertEquals(errors.toString(), 0, errors.size());
results = controlClient.query(new SolrQuery("*:*")).getResults();
numFound = results.getNumFound();
assertEquals(results.toString(), 2, numFound);
numFound = client.query(new SolrQuery("*:*")).getResults().getNumFound();
assertEquals(results.toString(), 2, numFound);
for (SolrClient c : clients) {
c.optimize();
//System.out.println(clients.get(0).request(new LukeRequest()));
}
cmdDistrib = new SolrCmdDistributor(updateShardHandler);
int cnt = atLeast(303);
for (int i = 0; i < cnt; i++) {
nodes.clear();
for (SolrClient c : clients) {
if (random().nextBoolean()) {
continue;
}
HttpSolrClient httpClient = (HttpSolrClient) c;
nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
}
AddUpdateCommand c = new AddUpdateCommand(null);
c.solrDoc = sdoc("id", id.incrementAndGet());
if (nodes.size() > 0) {
params = new ModifiableSolrParams();
cmdDistrib.distribAdd(c, nodes, params);
}
}
nodes.clear();
for (SolrClient c : clients) {
HttpSolrClient httpClient = (HttpSolrClient) c;
nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
}
final AtomicInteger commits = new AtomicInteger();
for (JettySolrRunner jetty : jettys) {
CoreContainer cores = jetty.getCoreContainer();
try (SolrCore core = cores.getCore("collection1")) {
core.getUpdateHandler().registerCommitCallback(new SolrEventListener() {
@Override
public void init(NamedList args) {
}
@Override
public void postSoftCommit() {
}
@Override
public void postCommit() {
commits.incrementAndGet();
}
@Override
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
}
});
}
}
params = new ModifiableSolrParams();
params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
cmdDistrib.distribCommit(ccmd, nodes, params);
cmdDistrib.finish();
assertEquals(getShardCount(), commits.get());
for (SolrClient c : clients) {
NamedList<Object> resp = c.request(new LukeRequest());
assertEquals("SOLR-3428: We only did adds - there should be no deletes", ((NamedList<Object>) resp.get("index")).get("numDocs"), ((NamedList<Object>) resp.get("index")).get("maxDoc"));
}
testMaxRetries();
testOneRetry();
testRetryNodeAgainstBadAddress();
testRetryNodeWontRetrySocketError();
testDistribOpenSearcher();
}
use of org.apache.solr.update.SolrCmdDistributor.Error in project lucene-solr by apache.
the class DistributedUpdateProcessor method doFinish.
// TODO: optionally fail if n replicas are not reached...
private void doFinish() {
// TODO: if not a forward and replication req is not specified, we could
// send in a background thread
cmdDistrib.finish();
List<Error> errors = cmdDistrib.getErrors();
// TODO - we may need to tell about more than one error...
List<Error> errorsForClient = new ArrayList<>(errors.size());
for (final SolrCmdDistributor.Error error : errors) {
if (error.req.node instanceof RetryNode) {
// if it's a forward, any fail is a problem -
// otherwise we assume things are fine if we got it locally
// until we start allowing min replication param
errorsForClient.add(error);
continue;
}
// succeeded
if (log.isWarnEnabled()) {
log.warn("Error sending update to " + error.req.node.getBaseUrl(), error.e);
}
// Since it is not a forward request, for each fail, try to tell them to
// recover - the doc was already added locally, so it should have been
// legit
DistribPhase phase = DistribPhase.parseParam(error.req.uReq.getParams().get(DISTRIB_UPDATE_PARAM));
if (phase != DistribPhase.FROMLEADER)
// don't have non-leaders try to recovery other nodes
continue;
// we don't want to run recovery on a node which missed a commit command
if (error.req.uReq.getParams().get(COMMIT_END_POINT) != null)
continue;
final String replicaUrl = error.req.node.getUrl();
// if the remote replica failed the request because of leader change (SOLR-6511), then fail the request
String cause = (error.e instanceof SolrException) ? ((SolrException) error.e).getMetadata("cause") : null;
if ("LeaderChanged".equals(cause)) {
// let's just fail this request and let the client retry? or just call processAdd again?
log.error("On " + cloudDesc.getCoreNodeName() + ", replica " + replicaUrl + " now thinks it is the leader! Failing the request to let the client retry! " + error.e);
errorsForClient.add(error);
continue;
}
String collection = null;
String shardId = null;
if (error.req.node instanceof StdNode) {
StdNode stdNode = (StdNode) error.req.node;
collection = stdNode.getCollection();
shardId = stdNode.getShardId();
// before we go setting other replicas to down, make sure we're still the leader!
String leaderCoreNodeName = null;
Exception getLeaderExc = null;
Replica leaderProps = null;
try {
leaderProps = zkController.getZkStateReader().getLeader(collection, shardId);
if (leaderProps != null) {
leaderCoreNodeName = leaderProps.getName();
}
} catch (Exception exc) {
getLeaderExc = exc;
}
if (leaderCoreNodeName == null) {
log.warn("Failed to determine if {} is still the leader for collection={} shardId={} " + "before putting {} into leader-initiated recovery", cloudDesc.getCoreNodeName(), collection, shardId, replicaUrl, getLeaderExc);
}
List<ZkCoreNodeProps> myReplicas = zkController.getZkStateReader().getReplicaProps(collection, cloudDesc.getShardId(), cloudDesc.getCoreNodeName());
boolean foundErrorNodeInReplicaList = false;
if (myReplicas != null) {
for (ZkCoreNodeProps replicaProp : myReplicas) {
if (((Replica) replicaProp.getNodeProps()).getName().equals(((Replica) stdNode.getNodeProps().getNodeProps()).getName())) {
foundErrorNodeInReplicaList = true;
break;
}
}
}
// If the client specified minRf and we didn't achieve the minRf, don't send recovery and let client retry
if (replicationTracker != null && replicationTracker.getAchievedRf() < replicationTracker.minRf) {
continue;
}
if (// we are still same leader
leaderCoreNodeName != null && cloudDesc.getCoreNodeName().equals(leaderCoreNodeName) && // we found an error for one of replicas
foundErrorNodeInReplicaList && !stdNode.getNodeProps().getCoreUrl().equals(leaderProps.getCoreUrl())) {
// we do not want to put ourself into LIR
try {
// if false, then the node is probably not "live" anymore
// and we do not need to send a recovery message
Throwable rootCause = SolrException.getRootCause(error.e);
log.error("Setting up to try to start recovery on replica {}", replicaUrl, rootCause);
zkController.ensureReplicaInLeaderInitiatedRecovery(req.getCore().getCoreContainer(), collection, shardId, stdNode.getNodeProps(), req.getCore().getCoreDescriptor(), false);
} catch (Exception exc) {
Throwable setLirZnodeFailedCause = SolrException.getRootCause(exc);
log.error("Leader failed to set replica " + error.req.node.getUrl() + " state to DOWN due to: " + setLirZnodeFailedCause, setLirZnodeFailedCause);
}
} else {
// not the leader anymore maybe or the error'd node is not my replica?
if (!foundErrorNodeInReplicaList) {
log.warn("Core " + cloudDesc.getCoreNodeName() + " belonging to " + collection + " " + shardId + ", does not have error'd node " + stdNode.getNodeProps().getCoreUrl() + " as a replica. " + "No request recovery command will be sent!");
} else {
log.warn("Core " + cloudDesc.getCoreNodeName() + " is no longer the leader for " + collection + " " + shardId + " or we tried to put ourself into LIR, no request recovery command will be sent!");
}
}
}
}
if (replicationTracker != null) {
rsp.getResponseHeader().add(UpdateRequest.REPFACT, replicationTracker.getAchievedRf());
rsp.getResponseHeader().add(UpdateRequest.MIN_REPFACT, replicationTracker.minRf);
replicationTracker = null;
}
if (0 < errorsForClient.size()) {
throw new DistributedUpdatesAsyncException(errorsForClient);
}
}
Aggregations