Search in sources :

Example 1 with CheckIndex

use of org.apache.lucene.index.CheckIndex in project elasticsearch by elastic.

the class MockFSDirectoryService method checkIndex.

public static void checkIndex(Logger logger, Store store, ShardId shardId) {
    if (store.tryIncRef()) {
        logger.info("start check index");
        try {
            Directory dir = store.directory();
            if (!Lucene.indexExists(dir)) {
                return;
            }
            try (CheckIndex checkIndex = new CheckIndex(dir)) {
                BytesStreamOutput os = new BytesStreamOutput();
                PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                checkIndex.setInfoStream(out);
                out.flush();
                CheckIndex.Status status = checkIndex.checkIndex();
                if (!status.clean) {
                    ESTestCase.checkIndexFailed = true;
                    logger.warn("check index [failure] index files={}\n{}", Arrays.toString(dir.listAll()), os.bytes().utf8ToString());
                    throw new IOException("index check failure");
                } else {
                    if (logger.isDebugEnabled()) {
                        logger.debug("check index [success]\n{}", os.bytes().utf8ToString());
                    }
                }
            } catch (LockObtainFailedException e) {
                ESTestCase.checkIndexFailed = true;
                throw new IllegalStateException("IndexWriter is still open on shard " + shardId, e);
            }
        } catch (Exception e) {
            logger.warn("failed to check index", e);
        } finally {
            logger.info("end check index");
            store.decRef();
        }
    }
}
Also used : PrintStream(java.io.PrintStream) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IOException(java.io.IOException) CheckIndex(org.apache.lucene.index.CheckIndex) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IOException(java.io.IOException) Directory(org.apache.lucene.store.Directory)

Example 2 with CheckIndex

use of org.apache.lucene.index.CheckIndex in project elasticsearch by elastic.

the class IndexShard method doCheckIndex.

private void doCheckIndex() throws IOException {
    long timeNS = System.nanoTime();
    if (!Lucene.indexExists(store.directory())) {
        return;
    }
    BytesStreamOutput os = new BytesStreamOutput();
    PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
    if ("checksum".equals(checkIndexOnStartup)) {
        // physical verification only: verify all checksums for the latest commit
        IOException corrupt = null;
        MetadataSnapshot metadata = snapshotStoreMetadata();
        for (Map.Entry<String, StoreFileMetaData> entry : metadata.asMap().entrySet()) {
            try {
                Store.checkIntegrity(entry.getValue(), store.directory());
                out.println("checksum passed: " + entry.getKey());
            } catch (IOException exc) {
                out.println("checksum failed: " + entry.getKey());
                exc.printStackTrace(out);
                corrupt = exc;
            }
        }
        out.flush();
        if (corrupt != null) {
            logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
            throw corrupt;
        }
    } else {
        // full checkindex
        try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
            checkIndex.setInfoStream(out);
            CheckIndex.Status status = checkIndex.checkIndex();
            out.flush();
            if (!status.clean) {
                if (state == IndexShardState.CLOSED) {
                    // ignore if closed....
                    return;
                }
                logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                if ("fix".equals(checkIndexOnStartup)) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("fixing index, writing new segments file ...");
                    }
                    checkIndex.exorciseIndex(status);
                    if (logger.isDebugEnabled()) {
                        logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
                    }
                } else {
                    // only throw a failure if we are not going to fix the index
                    throw new IllegalStateException("index check failure but can't fix it");
                }
            }
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug("check index [success]\n{}", os.bytes().utf8ToString());
    }
    recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS)));
}
Also used : PrintStream(java.io.PrintStream) StoreFileMetaData(org.elasticsearch.index.store.StoreFileMetaData) IOException(java.io.IOException) Map(java.util.Map) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) CheckIndex(org.apache.lucene.index.CheckIndex) MetadataSnapshot(org.elasticsearch.index.store.Store.MetadataSnapshot)

Example 3 with CheckIndex

use of org.apache.lucene.index.CheckIndex in project elasticsearch by elastic.

the class CorruptedFileIT method testReplicaCorruption.

/**
     * This test verifies that if we corrupt a replica, we can still get to green, even though
     * listing its store fails. Note, we need to make sure that replicas are allocated on all data
     * nodes, so that replica won't be sneaky and allocated on a node that doesn't have a corrupted
     * replica.
     */
public void testReplicaCorruption() throws Exception {
    int numDocs = scaledRandomIntBetween(100, 1000);
    internalCluster().ensureAtLeastNumDataNodes(2);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, cluster().numDataNodes() - 1).put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    // disable allocations of replicas post restart (the restart will change replicas to primaries, so we have
    // to capture replicas post restart)
    assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "primaries")));
    internalCluster().fullRestart();
    ensureYellow();
    final Index index = resolveIndex("test");
    final IndicesShardStoresResponse stores = client().admin().indices().prepareShardStores(index.getName()).get();
    for (IntObjectCursor<List<IndicesShardStoresResponse.StoreStatus>> shards : stores.getStoreStatuses().get(index.getName())) {
        for (IndicesShardStoresResponse.StoreStatus store : shards.value) {
            final ShardId shardId = new ShardId(index, shards.key);
            if (store.getAllocationStatus().equals(IndicesShardStoresResponse.StoreStatus.AllocationStatus.UNUSED)) {
                for (Path path : findFilesToCorruptOnNode(store.getNode().getName(), shardId)) {
                    try (OutputStream os = Files.newOutputStream(path)) {
                        os.write(0);
                    }
                    logger.info("corrupting file {} on node {}", path, store.getNode().getName());
                }
            }
        }
    }
    // enable allocation
    assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().putNull(EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey())));
    ensureGreen();
}
Also used : Path(java.nio.file.Path) IndicesShardStoresResponse(org.elasticsearch.action.admin.indices.shards.IndicesShardStoresResponse) OutputStream(java.io.OutputStream) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) CheckIndex(org.apache.lucene.index.CheckIndex) Index(org.elasticsearch.index.Index) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ShardId(org.elasticsearch.index.shard.ShardId) List(java.util.List) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CollectionUtils.iterableAsArrayList(org.elasticsearch.common.util.CollectionUtils.iterableAsArrayList)

Example 4 with CheckIndex

use of org.apache.lucene.index.CheckIndex in project elasticsearch by elastic.

the class CorruptedFileIT method testCorruptFileAndRecover.

/**
     * Tests that we can actually recover from a corruption on the primary given that we have replica shards around.
     */
public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    // have enough space for 3 copies
    internalCluster().ensureAtLeastNumDataNodes(3);
    if (cluster().numDataNodes() == 3) {
        logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
    }
    assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    disableAllocation("test");
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    final int numShards = numShards("test");
    ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
    logger.info("--> {} corrupted", corruptedShardRouting);
    enableAllocation("test");
    /*
         * we corrupted the primary shard - now lets make sure we never recover from it successfully
         */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    ClusterHealthResponse health = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus().timeout(// sometimes due to cluster rebalacing and random settings default timeout is just not enough.
    "5m").waitForNoRelocatingShards(true)).actionGet();
    if (health.isTimedOut()) {
        logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
        assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
    }
    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
    final int numIterations = scaledRandomIntBetween(5, 20);
    for (int i = 0; i < numIterations; i++) {
        SearchResponse response = client().prepareSearch().setSize(numDocs).get();
        assertHitCount(response, numDocs);
    }
    /*
         * now hook into the IndicesService and register a close listener to
         * run the checkindex. if the corruption is still there we will catch it.
         */
    // primary + 2 replicas
    final CountDownLatch latch = new CountDownLatch(numShards * 3);
    final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
    final IndexEventListener listener = new IndexEventListener() {

        @Override
        public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
            if (indexShard != null) {
                Store store = indexShard.store();
                store.incRef();
                try {
                    if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) {
                        return;
                    }
                    try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
                        BytesStreamOutput os = new BytesStreamOutput();
                        PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                        checkIndex.setInfoStream(out);
                        out.flush();
                        CheckIndex.Status status = checkIndex.checkIndex();
                        if (!status.clean) {
                            logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                            throw new IOException("index check failure");
                        }
                    }
                } catch (Exception e) {
                    exception.add(e);
                } finally {
                    store.decRef();
                    latch.countDown();
                }
            }
        }
    };
    for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
        eventListener.setNewDelegate(listener);
    }
    try {
        client().admin().indices().prepareDelete("test").get();
        latch.await();
        assertThat(exception, empty());
    } finally {
        for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
            eventListener.setNewDelegate(null);
        }
    }
}
Also used : MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) PrintStream(java.io.PrintStream) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) IndexShard(org.elasticsearch.index.shard.IndexShard) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) MockFSIndexStore(org.elasticsearch.test.store.MockFSIndexStore) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) TransportException(org.elasticsearch.transport.TransportException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ShardId(org.elasticsearch.index.shard.ShardId) MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) IndexEventListener(org.elasticsearch.index.shard.IndexEventListener) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) Nullable(org.elasticsearch.common.Nullable) CheckIndex(org.apache.lucene.index.CheckIndex) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 5 with CheckIndex

use of org.apache.lucene.index.CheckIndex in project lucene-solr by apache.

the class TestUtil method checkIndex.

/** If failFast is true, then throw the first exception when index corruption is hit, instead of moving on to other fields/segments to
   *  look for any other corruption.  */
public static CheckIndex.Status checkIndex(Directory dir, boolean crossCheckTermVectors, boolean failFast, ByteArrayOutputStream output) throws IOException {
    if (output == null) {
        output = new ByteArrayOutputStream(1024);
    }
    // some tests e.g. exception tests become much more complicated if they have to close the writer
    try (CheckIndex checker = new CheckIndex(dir, NoLockFactory.INSTANCE.obtainLock(dir, "bogus"))) {
        checker.setCrossCheckTermVectors(crossCheckTermVectors);
        checker.setFailFast(failFast);
        checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
        CheckIndex.Status indexStatus = checker.checkIndex(null);
        if (indexStatus == null || indexStatus.clean == false) {
            System.out.println("CheckIndex failed");
            System.out.println(output.toString(IOUtils.UTF_8));
            throw new RuntimeException("CheckIndex failed");
        } else {
            if (LuceneTestCase.INFOSTREAM) {
                System.out.println(output.toString(IOUtils.UTF_8));
            }
            return indexStatus;
        }
    }
}
Also used : PrintStream(java.io.PrintStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CheckIndex(org.apache.lucene.index.CheckIndex)

Aggregations

CheckIndex (org.apache.lucene.index.CheckIndex)13 PrintStream (java.io.PrintStream)8 IOException (java.io.IOException)7 Directory (org.apache.lucene.store.Directory)7 BytesStreamOutput (org.elasticsearch.common.io.stream.BytesStreamOutput)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 File (java.io.File)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 DirectoryReader (org.apache.lucene.index.DirectoryReader)2 FSDirectory (org.apache.lucene.store.FSDirectory)2 LockObtainFailedException (org.apache.lucene.store.LockObtainFailedException)2 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)2 SearchResponse (org.elasticsearch.action.search.SearchResponse)2 ByteSizeValue (org.elasticsearch.common.unit.ByteSizeValue)2 ShardId (org.elasticsearch.index.shard.ShardId)2 MetadataSnapshot (org.elasticsearch.index.store.Store.MetadataSnapshot)2 ObjectLongMap (com.carrotsearch.hppc.ObjectLongMap)1 NotStoredException (io.anserini.index.NotStoredException)1