use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.
the class TestCloudRecovery method leaderRecoverFromLogOnStartupTest.
@Test
public void leaderRecoverFromLogOnStartupTest() throws Exception {
AtomicInteger countReplayLog = new AtomicInteger(0);
DirectUpdateHandler2.commitOnClose = false;
UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet;
CloudSolrClient cloudClient = cluster.getSolrClient();
cloudClient.add(COLLECTION, sdoc("id", "1"));
cloudClient.add(COLLECTION, sdoc("id", "2"));
cloudClient.add(COLLECTION, sdoc("id", "3"));
cloudClient.add(COLLECTION, sdoc("id", "4"));
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "*:*");
QueryResponse resp = cloudClient.query(COLLECTION, params);
assertEquals(0, resp.getResults().getNumFound());
ChaosMonkey.stop(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
ChaosMonkey.start(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
resp = cloudClient.query(COLLECTION, params);
assertEquals(4, resp.getResults().getNumFound());
// Make sure all nodes is recover from tlog
if (onlyLeaderIndexes) {
// Leader election can be kicked off, so 2 tlog replicas will replay its tlog before becoming new leader
assertTrue(countReplayLog.get() >= 2);
} else {
assertEquals(4, countReplayLog.get());
}
// check metrics
int replicationCount = 0;
int errorsCount = 0;
int skippedCount = 0;
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
SolrMetricManager manager = jetty.getCoreContainer().getMetricManager();
List<String> registryNames = manager.registryNames().stream().filter(s -> s.startsWith("solr.core.")).collect(Collectors.toList());
for (String registry : registryNames) {
Map<String, Metric> metrics = manager.registry(registry).getMetrics();
Timer timer = (Timer) metrics.get("REPLICATION.peerSync.time");
Counter counter = (Counter) metrics.get("REPLICATION.peerSync.errors");
Counter skipped = (Counter) metrics.get("REPLICATION.peerSync.skipped");
replicationCount += timer.getCount();
errorsCount += counter.getCount();
skippedCount += skipped.getCount();
}
}
if (onlyLeaderIndexes) {
assertTrue(replicationCount >= 2);
} else {
assertEquals(2, replicationCount);
}
}
use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.
the class HdfsDirectoryFactoryTest method testLocalityReporter.
@Test
public void testLocalityReporter() throws Exception {
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.set("dfs.permissions.enabled", "false");
Random r = random();
HdfsDirectoryFactory factory = new HdfsDirectoryFactory();
SolrMetricManager metricManager = new SolrMetricManager();
String registry = TestUtil.randomSimpleString(r, 2, 10);
String scope = TestUtil.randomSimpleString(r, 2, 10);
Map<String, String> props = new HashMap<String, String>();
props.put(HdfsDirectoryFactory.HDFS_HOME, HdfsTestUtil.getURI(dfsCluster) + "/solr");
props.put(HdfsDirectoryFactory.BLOCKCACHE_ENABLED, "false");
props.put(HdfsDirectoryFactory.NRTCACHINGDIRECTORY_ENABLE, "false");
props.put(HdfsDirectoryFactory.LOCALITYMETRICS_ENABLED, "true");
factory.init(new NamedList<>(props));
factory.initializeMetrics(metricManager, registry, scope);
// get the metrics map for the locality bean
MetricsMap metrics = (MetricsMap) metricManager.registry(registry).getMetrics().get("OTHER." + scope + ".hdfsLocality");
// We haven't done anything, so there should be no data
Map<String, Object> statistics = metrics.getValue();
assertEquals("Saw bytes that were not written: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL), 0l, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL));
assertEquals("Counted bytes as local when none written: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_RATIO), 0, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_RATIO));
// create a directory and a file
String path = HdfsTestUtil.getURI(dfsCluster) + "/solr3/";
Directory dir = factory.create(path, NoLockFactory.INSTANCE, DirContext.DEFAULT);
try (IndexOutput writer = dir.createOutput("output", null)) {
writer.writeLong(42l);
}
final long long_bytes = Long.SIZE / Byte.SIZE;
// no locality because hostname not set
factory.setHost("bogus");
statistics = metrics.getValue();
assertEquals("Wrong number of total bytes counted: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL), long_bytes, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL));
assertEquals("Wrong number of total blocks counted: " + statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_TOTAL), 1, statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_TOTAL));
assertEquals("Counted block as local when bad hostname set: " + statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_LOCAL), 0, statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_LOCAL));
// set hostname and check again
factory.setHost("127.0.0.1");
statistics = metrics.getValue();
assertEquals("Did not count block as local after setting hostname: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_LOCAL), long_bytes, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_LOCAL));
factory.close();
}
use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.
the class RequestHandlersTest method testInitCount.
@Test
public void testInitCount() {
String registry = h.getCore().getCoreMetricManager().getRegistryName();
SolrMetricManager manager = h.getCoreContainer().getMetricManager();
Gauge<Number> g = (Gauge<Number>) manager.registry(registry).getMetrics().get("QUERY.mock.initCount");
assertEquals("Incorrect init count", 1, g.getValue().intValue());
}
use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.
the class TestRandomRequestDistribution method testQueryAgainstDownReplica.
/**
* Asserts that requests against a collection are only served by a 'active' local replica
*/
private void testQueryAgainstDownReplica() throws Exception {
log.info("Creating collection 'football' with 1 shard and 2 replicas");
CollectionAdminRequest.createCollection("football", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
waitForRecoveriesToFinish("football", true);
cloudClient.getZkStateReader().forceUpdateCollection("football");
Replica leader = null;
Replica notLeader = null;
Collection<Replica> replicas = cloudClient.getZkStateReader().getClusterState().getSlice("football", "shard1").getReplicas();
for (Replica replica : replicas) {
if (replica.getStr(ZkStateReader.LEADER_PROP) != null) {
leader = replica;
} else {
notLeader = replica;
}
}
//Simulate a replica being in down state.
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, notLeader.getStr(ZkStateReader.BASE_URL_PROP), ZkStateReader.NODE_NAME_PROP, notLeader.getStr(ZkStateReader.NODE_NAME_PROP), ZkStateReader.COLLECTION_PROP, "football", ZkStateReader.SHARD_ID_PROP, "shard1", ZkStateReader.CORE_NAME_PROP, notLeader.getStr(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
DistributedQueue q = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
q.offer(Utils.toJSON(m));
verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);
//Query against the node which hosts the down replica
String baseUrl = notLeader.getStr(ZkStateReader.BASE_URL_PROP);
if (!baseUrl.endsWith("/"))
baseUrl += "/";
String path = baseUrl + "football";
log.info("Firing queries against path=" + path);
try (HttpSolrClient client = getHttpSolrClient(path)) {
client.setSoTimeout(5000);
client.setConnectionTimeout(2000);
SolrCore leaderCore = null;
for (JettySolrRunner jetty : jettys) {
CoreContainer container = jetty.getCoreContainer();
for (SolrCore core : container.getCores()) {
if (core.getName().equals(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
leaderCore = core;
break;
}
}
}
assertNotNull(leaderCore);
SolrMetricManager leaderMetricManager = leaderCore.getCoreContainer().getMetricManager();
String leaderRegistry = leaderCore.getCoreMetricManager().getRegistryName();
Counter cnt = leaderMetricManager.counter(null, leaderRegistry, "requests", "QUERY.standard");
// All queries should be served by the active replica
// To make sure that's true we keep querying the down replica
// If queries are getting processed by the down replica then the cluster state hasn't updated for that replica
// locally
// So we keep trying till it has updated and then verify if ALL queries go to the active replica
long count = 0;
while (true) {
count++;
client.query(new SolrQuery("*:*"));
long c = cnt.getCount();
if (c == 1) {
// cluster state has got update locally
break;
} else {
Thread.sleep(100);
}
if (count > 10000) {
fail("After 10k queries we still see all requests being processed by the down replica");
}
}
// Now we fire a few additional queries and make sure ALL of them
// are served by the active replica
int moreQueries = TestUtil.nextInt(random(), 4, 10);
// Since 1 query has already hit the leader
count = 1;
for (int i = 0; i < moreQueries; i++) {
client.query(new SolrQuery("*:*"));
count++;
long c = cnt.getCount();
assertEquals("Query wasn't served by leader", count, c);
}
}
}
use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.
the class TestRandomRequestDistribution method testRequestTracking.
/**
* Asserts that requests aren't always sent to the same poor node. See SOLR-7493
*/
private void testRequestTracking() throws Exception {
CollectionAdminRequest.createCollection("a1x2", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
CollectionAdminRequest.createCollection("b1x1", 1, 1).setCreateNodeSet(nodeNames.get(2)).process(cloudClient);
waitForRecoveriesToFinish("a1x2", true);
waitForRecoveriesToFinish("b1x1", true);
cloudClient.getZkStateReader().forceUpdateCollection("b1x1");
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
DocCollection b1x1 = clusterState.getCollection("b1x1");
Collection<Replica> replicas = b1x1.getSlice("shard1").getReplicas();
assertEquals(1, replicas.size());
String baseUrl = replicas.iterator().next().getStr(ZkStateReader.BASE_URL_PROP);
if (!baseUrl.endsWith("/"))
baseUrl += "/";
try (HttpSolrClient client = getHttpSolrClient(baseUrl + "a1x2")) {
client.setSoTimeout(5000);
client.setConnectionTimeout(2000);
log.info("Making requests to " + baseUrl + "a1x2");
for (int i = 0; i < 10; i++) {
client.query(new SolrQuery("*:*"));
}
}
Map<String, Integer> shardVsCount = new HashMap<>();
for (JettySolrRunner runner : jettys) {
CoreContainer container = runner.getCoreContainer();
SolrMetricManager metricManager = container.getMetricManager();
for (SolrCore core : container.getCores()) {
String registry = core.getCoreMetricManager().getRegistryName();
Counter cnt = metricManager.counter(null, registry, "requests", "QUERY.standard");
SolrRequestHandler select = core.getRequestHandler("");
// long c = (long) select.getStatistics().get("requests");
shardVsCount.put(core.getName(), (int) cnt.getCount());
}
}
log.info("Shard count map = " + shardVsCount);
for (Map.Entry<String, Integer> entry : shardVsCount.entrySet()) {
assertTrue("Shard " + entry.getKey() + " received all 10 requests", entry.getValue() != 10);
}
}
Aggregations