Search in sources :

Example 1 with SolrMetricManager

use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.

the class TestCloudRecovery method leaderRecoverFromLogOnStartupTest.

@Test
public void leaderRecoverFromLogOnStartupTest() throws Exception {
    AtomicInteger countReplayLog = new AtomicInteger(0);
    DirectUpdateHandler2.commitOnClose = false;
    UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet;
    CloudSolrClient cloudClient = cluster.getSolrClient();
    cloudClient.add(COLLECTION, sdoc("id", "1"));
    cloudClient.add(COLLECTION, sdoc("id", "2"));
    cloudClient.add(COLLECTION, sdoc("id", "3"));
    cloudClient.add(COLLECTION, sdoc("id", "4"));
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set("q", "*:*");
    QueryResponse resp = cloudClient.query(COLLECTION, params);
    assertEquals(0, resp.getResults().getNumFound());
    ChaosMonkey.stop(cluster.getJettySolrRunners());
    assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
    ChaosMonkey.start(cluster.getJettySolrRunners());
    assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
    resp = cloudClient.query(COLLECTION, params);
    assertEquals(4, resp.getResults().getNumFound());
    // Make sure all nodes is recover from tlog
    if (onlyLeaderIndexes) {
        // Leader election can be kicked off, so 2 tlog replicas will replay its tlog before becoming new leader
        assertTrue(countReplayLog.get() >= 2);
    } else {
        assertEquals(4, countReplayLog.get());
    }
    // check metrics
    int replicationCount = 0;
    int errorsCount = 0;
    int skippedCount = 0;
    for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
        SolrMetricManager manager = jetty.getCoreContainer().getMetricManager();
        List<String> registryNames = manager.registryNames().stream().filter(s -> s.startsWith("solr.core.")).collect(Collectors.toList());
        for (String registry : registryNames) {
            Map<String, Metric> metrics = manager.registry(registry).getMetrics();
            Timer timer = (Timer) metrics.get("REPLICATION.peerSync.time");
            Counter counter = (Counter) metrics.get("REPLICATION.peerSync.errors");
            Counter skipped = (Counter) metrics.get("REPLICATION.peerSync.skipped");
            replicationCount += timer.getCount();
            errorsCount += counter.getCount();
            skippedCount += skipped.getCount();
        }
    }
    if (onlyLeaderIndexes) {
        assertTrue(replicationCount >= 2);
    } else {
        assertEquals(2, replicationCount);
    }
}
Also used : Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) HashMap(java.util.HashMap) DirectUpdateHandler2(org.apache.solr.update.DirectUpdateHandler2) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ClusterStateUtil(org.apache.solr.common.cloud.ClusterStateUtil) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) UpdateLog(org.apache.solr.update.UpdateLog) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) Before(org.junit.Before) AfterClass(org.junit.AfterClass) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrCore(org.apache.solr.core.SolrCore) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) FileOutputStream(java.io.FileOutputStream) Metric(com.codahale.metrics.Metric) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) File(java.io.File) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Timer(com.codahale.metrics.Timer) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager) TestInjection(org.apache.solr.util.TestInjection) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) Counter(com.codahale.metrics.Counter) Timer(com.codahale.metrics.Timer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager) Metric(com.codahale.metrics.Metric) Test(org.junit.Test)

Example 2 with SolrMetricManager

use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.

the class HdfsDirectoryFactoryTest method testLocalityReporter.

@Test
public void testLocalityReporter() throws Exception {
    Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
    conf.set("dfs.permissions.enabled", "false");
    Random r = random();
    HdfsDirectoryFactory factory = new HdfsDirectoryFactory();
    SolrMetricManager metricManager = new SolrMetricManager();
    String registry = TestUtil.randomSimpleString(r, 2, 10);
    String scope = TestUtil.randomSimpleString(r, 2, 10);
    Map<String, String> props = new HashMap<String, String>();
    props.put(HdfsDirectoryFactory.HDFS_HOME, HdfsTestUtil.getURI(dfsCluster) + "/solr");
    props.put(HdfsDirectoryFactory.BLOCKCACHE_ENABLED, "false");
    props.put(HdfsDirectoryFactory.NRTCACHINGDIRECTORY_ENABLE, "false");
    props.put(HdfsDirectoryFactory.LOCALITYMETRICS_ENABLED, "true");
    factory.init(new NamedList<>(props));
    factory.initializeMetrics(metricManager, registry, scope);
    // get the metrics map for the locality bean
    MetricsMap metrics = (MetricsMap) metricManager.registry(registry).getMetrics().get("OTHER." + scope + ".hdfsLocality");
    // We haven't done anything, so there should be no data
    Map<String, Object> statistics = metrics.getValue();
    assertEquals("Saw bytes that were not written: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL), 0l, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL));
    assertEquals("Counted bytes as local when none written: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_RATIO), 0, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_RATIO));
    // create a directory and a file
    String path = HdfsTestUtil.getURI(dfsCluster) + "/solr3/";
    Directory dir = factory.create(path, NoLockFactory.INSTANCE, DirContext.DEFAULT);
    try (IndexOutput writer = dir.createOutput("output", null)) {
        writer.writeLong(42l);
    }
    final long long_bytes = Long.SIZE / Byte.SIZE;
    // no locality because hostname not set
    factory.setHost("bogus");
    statistics = metrics.getValue();
    assertEquals("Wrong number of total bytes counted: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL), long_bytes, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_TOTAL));
    assertEquals("Wrong number of total blocks counted: " + statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_TOTAL), 1, statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_TOTAL));
    assertEquals("Counted block as local when bad hostname set: " + statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_LOCAL), 0, statistics.get(HdfsLocalityReporter.LOCALITY_BLOCKS_LOCAL));
    // set hostname and check again
    factory.setHost("127.0.0.1");
    statistics = metrics.getValue();
    assertEquals("Did not count block as local after setting hostname: " + statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_LOCAL), long_bytes, statistics.get(HdfsLocalityReporter.LOCALITY_BYTES_LOCAL));
    factory.close();
}
Also used : MetricsMap(org.apache.solr.metrics.MetricsMap) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) IndexOutput(org.apache.lucene.store.IndexOutput) Random(java.util.Random) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Example 3 with SolrMetricManager

use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.

the class RequestHandlersTest method testInitCount.

@Test
public void testInitCount() {
    String registry = h.getCore().getCoreMetricManager().getRegistryName();
    SolrMetricManager manager = h.getCoreContainer().getMetricManager();
    Gauge<Number> g = (Gauge<Number>) manager.registry(registry).getMetrics().get("QUERY.mock.initCount");
    assertEquals("Incorrect init count", 1, g.getValue().intValue());
}
Also used : SolrMetricManager(org.apache.solr.metrics.SolrMetricManager) Gauge(com.codahale.metrics.Gauge) Test(org.junit.Test)

Example 4 with SolrMetricManager

use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.

the class TestRandomRequestDistribution method testQueryAgainstDownReplica.

/**
   * Asserts that requests against a collection are only served by a 'active' local replica
   */
private void testQueryAgainstDownReplica() throws Exception {
    log.info("Creating collection 'football' with 1 shard and 2 replicas");
    CollectionAdminRequest.createCollection("football", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
    waitForRecoveriesToFinish("football", true);
    cloudClient.getZkStateReader().forceUpdateCollection("football");
    Replica leader = null;
    Replica notLeader = null;
    Collection<Replica> replicas = cloudClient.getZkStateReader().getClusterState().getSlice("football", "shard1").getReplicas();
    for (Replica replica : replicas) {
        if (replica.getStr(ZkStateReader.LEADER_PROP) != null) {
            leader = replica;
        } else {
            notLeader = replica;
        }
    }
    //Simulate a replica being in down state.
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, notLeader.getStr(ZkStateReader.BASE_URL_PROP), ZkStateReader.NODE_NAME_PROP, notLeader.getStr(ZkStateReader.NODE_NAME_PROP), ZkStateReader.COLLECTION_PROP, "football", ZkStateReader.SHARD_ID_PROP, "shard1", ZkStateReader.CORE_NAME_PROP, notLeader.getStr(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
    log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
    DistributedQueue q = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
    q.offer(Utils.toJSON(m));
    verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);
    //Query against the node which hosts the down replica
    String baseUrl = notLeader.getStr(ZkStateReader.BASE_URL_PROP);
    if (!baseUrl.endsWith("/"))
        baseUrl += "/";
    String path = baseUrl + "football";
    log.info("Firing queries against path=" + path);
    try (HttpSolrClient client = getHttpSolrClient(path)) {
        client.setSoTimeout(5000);
        client.setConnectionTimeout(2000);
        SolrCore leaderCore = null;
        for (JettySolrRunner jetty : jettys) {
            CoreContainer container = jetty.getCoreContainer();
            for (SolrCore core : container.getCores()) {
                if (core.getName().equals(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
                    leaderCore = core;
                    break;
                }
            }
        }
        assertNotNull(leaderCore);
        SolrMetricManager leaderMetricManager = leaderCore.getCoreContainer().getMetricManager();
        String leaderRegistry = leaderCore.getCoreMetricManager().getRegistryName();
        Counter cnt = leaderMetricManager.counter(null, leaderRegistry, "requests", "QUERY.standard");
        // All queries should be served by the active replica
        // To make sure that's true we keep querying the down replica
        // If queries are getting processed by the down replica then the cluster state hasn't updated for that replica
        // locally
        // So we keep trying till it has updated and then verify if ALL queries go to the active replica
        long count = 0;
        while (true) {
            count++;
            client.query(new SolrQuery("*:*"));
            long c = cnt.getCount();
            if (c == 1) {
                // cluster state has got update locally
                break;
            } else {
                Thread.sleep(100);
            }
            if (count > 10000) {
                fail("After 10k queries we still see all requests being processed by the down replica");
            }
        }
        // Now we fire a few additional queries and make sure ALL of them
        // are served by the active replica
        int moreQueries = TestUtil.nextInt(random(), 4, 10);
        // Since 1 query has already hit the leader
        count = 1;
        for (int i = 0; i < moreQueries; i++) {
            client.query(new SolrQuery("*:*"));
            count++;
            long c = cnt.getCount();
            assertEquals("Query wasn't served by leader", count, c);
        }
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) Replica(org.apache.solr.common.cloud.Replica) SolrQuery(org.apache.solr.client.solrj.SolrQuery) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) Counter(com.codahale.metrics.Counter) CoreContainer(org.apache.solr.core.CoreContainer) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager)

Example 5 with SolrMetricManager

use of org.apache.solr.metrics.SolrMetricManager in project lucene-solr by apache.

the class TestRandomRequestDistribution method testRequestTracking.

/**
   * Asserts that requests aren't always sent to the same poor node. See SOLR-7493
   */
private void testRequestTracking() throws Exception {
    CollectionAdminRequest.createCollection("a1x2", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
    CollectionAdminRequest.createCollection("b1x1", 1, 1).setCreateNodeSet(nodeNames.get(2)).process(cloudClient);
    waitForRecoveriesToFinish("a1x2", true);
    waitForRecoveriesToFinish("b1x1", true);
    cloudClient.getZkStateReader().forceUpdateCollection("b1x1");
    ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
    DocCollection b1x1 = clusterState.getCollection("b1x1");
    Collection<Replica> replicas = b1x1.getSlice("shard1").getReplicas();
    assertEquals(1, replicas.size());
    String baseUrl = replicas.iterator().next().getStr(ZkStateReader.BASE_URL_PROP);
    if (!baseUrl.endsWith("/"))
        baseUrl += "/";
    try (HttpSolrClient client = getHttpSolrClient(baseUrl + "a1x2")) {
        client.setSoTimeout(5000);
        client.setConnectionTimeout(2000);
        log.info("Making requests to " + baseUrl + "a1x2");
        for (int i = 0; i < 10; i++) {
            client.query(new SolrQuery("*:*"));
        }
    }
    Map<String, Integer> shardVsCount = new HashMap<>();
    for (JettySolrRunner runner : jettys) {
        CoreContainer container = runner.getCoreContainer();
        SolrMetricManager metricManager = container.getMetricManager();
        for (SolrCore core : container.getCores()) {
            String registry = core.getCoreMetricManager().getRegistryName();
            Counter cnt = metricManager.counter(null, registry, "requests", "QUERY.standard");
            SolrRequestHandler select = core.getRequestHandler("");
            //        long c = (long) select.getStatistics().get("requests");
            shardVsCount.put(core.getName(), (int) cnt.getCount());
        }
    }
    log.info("Shard count map = " + shardVsCount);
    for (Map.Entry<String, Integer> entry : shardVsCount.entrySet()) {
        assertTrue("Shard " + entry.getKey() + " received all 10 requests", entry.getValue() != 10);
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) SolrCore(org.apache.solr.core.SolrCore) Replica(org.apache.solr.common.cloud.Replica) SolrQuery(org.apache.solr.client.solrj.SolrQuery) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) Counter(com.codahale.metrics.Counter) CoreContainer(org.apache.solr.core.CoreContainer) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager) DocCollection(org.apache.solr.common.cloud.DocCollection) HashMap(java.util.HashMap) Map(java.util.Map) SolrRequestHandler(org.apache.solr.request.SolrRequestHandler)

Aggregations

SolrMetricManager (org.apache.solr.metrics.SolrMetricManager)29 Test (org.junit.Test)13 MetricsMap (org.apache.solr.metrics.MetricsMap)9 Map (java.util.Map)8 CoreContainer (org.apache.solr.core.CoreContainer)8 MetricRegistry (com.codahale.metrics.MetricRegistry)6 Metric (com.codahale.metrics.Metric)5 Set (java.util.Set)5 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)5 SolrException (org.apache.solr.common.SolrException)5 SolrCore (org.apache.solr.core.SolrCore)5 Counter (com.codahale.metrics.Counter)4 IOException (java.io.IOException)4 MethodHandles (java.lang.invoke.MethodHandles)4 Path (java.nio.file.Path)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 List (java.util.List)4 Random (java.util.Random)4