Search in sources :

Example 1 with FlatFileStore

use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.

the class DocumentStoreIndexer method reindex.

public void reindex() throws CommitFailedException, IOException {
    configureEstimators();
    NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
    NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
    indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
    NodeBuilder builder = copyOnWriteStore.getRoot().builder();
    CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder);
    if (indexer.isEmpty()) {
        return;
    }
    closer.register(indexer);
    // TODO How to ensure we can safely read from secondary
    DocumentNodeState rootDocumentState = (DocumentNodeState) checkpointedState;
    DocumentNodeStore nodeStore = (DocumentNodeStore) indexHelper.getNodeStore();
    NodeStateEntryTraverser nsep = new NodeStateEntryTraverser(rootDocumentState.getRootRevision(), nodeStore, getMongoDocumentStore()).withProgressCallback(this::reportDocumentRead).withPathPredicate(indexer::shouldInclude);
    closer.register(nsep);
    // As first traversal is for dumping change the message prefix
    progressReporter.setMessagePrefix("Dumping");
    // TODO Use flatFileStore only if we have relative nodes to be indexed
    FlatFileStore flatFileStore = new FlatFileNodeStoreBuilder(nsep, indexHelper.getWorkDir()).withBlobStore(indexHelper.getGCBlobStore()).withPreferredPathElements(indexer.getRelativeIndexedNodeNames()).build();
    closer.register(flatFileStore);
    progressReporter.reset();
    if (flatFileStore.getEntryCount() > 0) {
        progressReporter.setNodeCountEstimator((String basePath, Set<String> indexPaths) -> flatFileStore.getEntryCount());
    }
    progressReporter.reindexingTraversalStart("/");
    Stopwatch indexerWatch = Stopwatch.createStarted();
    for (NodeStateEntry entry : flatFileStore) {
        reportDocumentRead(entry.getPath());
        indexer.index(entry);
    }
    progressReporter.reindexingTraversalEnd();
    progressReporter.logReport();
    log.info("Completed the indexing in {}", indexerWatch);
    copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
    indexerSupport.postIndexWork(copyOnWriteStore);
}
Also used : NodeState(org.apache.jackrabbit.oak.spi.state.NodeState) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) HashSet(java.util.HashSet) Set(java.util.Set) FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) Stopwatch(com.google.common.base.Stopwatch) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) NodeStore(org.apache.jackrabbit.oak.spi.state.NodeStore) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) FlatFileNodeStoreBuilder(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder)

Example 2 with FlatFileStore

use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.

the class DocumentStoreIndexerBase method buildFlatFileStore.

/**
 * @return an Instance of FlatFileStore, whose getFlatFileStorePath() method can be used to get the absolute path to this store.
 * @throws IOException
 * @throws CommitFailedException
 */
public FlatFileStore buildFlatFileStore() throws IOException, CommitFailedException {
    NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
    NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
    NodeBuilder builder = copyOnWriteStore.getRoot().builder();
    NodeState root = builder.getNodeState();
    indexerSupport.updateIndexDefinitions(builder);
    IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder();
    Set<String> preferredPathElements = new HashSet<>();
    Set<IndexDefinition> indexDefinitions = new HashSet<>();
    for (String indexPath : indexHelper.getIndexPaths()) {
        NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false);
        IndexDefinition indexDf = indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build();
        preferredPathElements.addAll(indexDf.getRelativeNodeNames());
        indexDefinitions.add(indexDf);
    }
    Predicate<String> predicate = s -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(s) != PathFilter.Result.EXCLUDE);
    FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, null, predicate, preferredPathElements);
    log.info("FlatFileStore built at {}. To use this flatFileStore in a reindex step, set System Property-{} with value {}", flatFileStore.getFlatFileStorePath(), OAK_INDEXER_SORTED_FILE_PATH, flatFileStore.getFlatFileStorePath());
    return flatFileStore;
}
Also used : FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) CommitFailedException(org.apache.jackrabbit.oak.api.CommitFailedException) CommitInfo(org.apache.jackrabbit.oak.spi.commit.CommitInfo) Stopwatch(com.google.common.base.Stopwatch) IndexDefinition(org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition) MongoDocumentTraverser(org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentTraverser) IndexUpdateCallback(org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback) LoggerFactory(org.slf4j.LoggerFactory) IndexHelper(org.apache.jackrabbit.oak.index.IndexHelper) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MetricStatisticsProvider(org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider) IndexerSupport(org.apache.jackrabbit.oak.index.IndexerSupport) EmptyHook(org.apache.jackrabbit.oak.spi.commit.EmptyHook) DefaultMemoryManager(org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryManager) Closer(com.google.common.io.Closer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DocumentStoreSplitter(org.apache.jackrabbit.oak.plugins.document.mongo.DocumentStoreSplitter) NodeState(org.apache.jackrabbit.oak.spi.state.NodeState) RevisionVector(org.apache.jackrabbit.oak.plugins.document.RevisionVector) PathFilter(org.apache.jackrabbit.oak.spi.filter.PathFilter) OAK_INDEXER_SORTED_FILE_PATH(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) FlatFileNodeStoreBuilder(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder) NodeTraversalCallback(org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) Predicate(java.util.function.Predicate) Preconditions.checkNotNull(com.google.common.base.Preconditions.checkNotNull) MongoConnection(org.apache.jackrabbit.oak.plugins.document.util.MongoConnection) NodeStore(org.apache.jackrabbit.oak.spi.state.NodeStore) Set(java.util.Set) IOException(java.io.IOException) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) File(java.io.File) MongoDocumentStore(org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore) List(java.util.List) Collection(org.apache.jackrabbit.oak.plugins.document.Collection) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) IndexingProgressReporter(org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter) Closeable(java.io.Closeable) MemoryManager(org.apache.jackrabbit.oak.index.indexer.document.flatfile.MemoryManager) IndexConstants(org.apache.jackrabbit.oak.plugins.index.IndexConstants) MetricRateEstimator(org.apache.jackrabbit.oak.plugins.index.progress.MetricRateEstimator) TYPE_PROPERTY_NAME(org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME) NodeStateUtils(org.apache.jackrabbit.oak.spi.state.NodeStateUtils) StatisticsProvider(org.apache.jackrabbit.oak.stats.StatisticsProvider) NodeState(org.apache.jackrabbit.oak.spi.state.NodeState) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) FlatFileNodeStoreBuilder(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) NodeStore(org.apache.jackrabbit.oak.spi.state.NodeStore) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) IndexDefinition(org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition) HashSet(java.util.HashSet)

Example 3 with FlatFileStore

use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.

the class DocumentStoreIndexerBase method reindex.

public void reindex() throws CommitFailedException, IOException {
    IndexingProgressReporter progressReporter = new IndexingProgressReporter(IndexUpdateCallback.NOOP, NodeTraversalCallback.NOOP);
    configureEstimators(progressReporter);
    NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
    NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
    indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
    NodeBuilder builder = copyOnWriteStore.getRoot().builder();
    CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder, progressReporter);
    if (indexer.isEmpty()) {
        return;
    }
    closer.register(indexer);
    FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, indexer, indexer::shouldInclude, null);
    progressReporter.reset();
    if (flatFileStore.getEntryCount() > 0) {
        FlatFileStore finalFlatFileStore = flatFileStore;
        progressReporter.setNodeCountEstimator((String basePath, Set<String> indexPaths) -> finalFlatFileStore.getEntryCount());
    }
    progressReporter.reindexingTraversalStart("/");
    preIndexOpertaions(indexer.getIndexers());
    Stopwatch indexerWatch = Stopwatch.createStarted();
    for (NodeStateEntry entry : flatFileStore) {
        reportDocumentRead(entry.getPath(), progressReporter);
        indexer.index(entry);
    }
    progressReporter.reindexingTraversalEnd();
    progressReporter.logReport();
    log.info("Completed the indexing in {}", indexerWatch);
    copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
    indexerSupport.postIndexWork(copyOnWriteStore);
}
Also used : NodeState(org.apache.jackrabbit.oak.spi.state.NodeState) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) NodeStore(org.apache.jackrabbit.oak.spi.state.NodeStore) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) HashSet(java.util.HashSet) Set(java.util.Set) MemoryNodeStore(org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore) FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) Stopwatch(com.google.common.base.Stopwatch) IndexingProgressReporter(org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder)

Example 4 with FlatFileStore

use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.

the class DocumentStoreIndexerBase method buildFlatFileStore.

private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer, Predicate<String> pathPredicate, Set<String> preferredPathElements) throws IOException {
    Stopwatch flatFileStoreWatch = Stopwatch.createStarted();
    int executionCount = 1;
    CompositeException lastException = null;
    List<File> previousDownloadDirs = new ArrayList<>();
    FlatFileStore flatFileStore = null;
    // TODO How to ensure we can safely read from secondary
    DocumentNodeState rootDocumentState = (DocumentNodeState) checkpointedState;
    DocumentNodeStore nodeStore = (DocumentNodeStore) indexHelper.getNodeStore();
    DocumentStoreSplitter splitter = new DocumentStoreSplitter(getMongoDocumentStore());
    List<Long> lastModifiedBreakPoints = splitter.split(Collection.NODES, 0L, 10);
    FlatFileNodeStoreBuilder builder = null;
    int backOffTimeInMillis = 5000;
    MemoryManager memoryManager = new DefaultMemoryManager();
    while (flatFileStore == null && executionCount <= MAX_DOWNLOAD_ATTEMPTS) {
        try {
            builder = new FlatFileNodeStoreBuilder(indexHelper.getWorkDir(), memoryManager).withLastModifiedBreakPoints(lastModifiedBreakPoints).withBlobStore(indexHelper.getGCBlobStore()).withPreferredPathElements((preferredPathElements != null) ? preferredPathElements : indexer.getRelativeIndexedNodeNames()).addExistingDataDumpDir(indexerSupport.getExistingDataDumpDir()).withPathPredicate(pathPredicate).withNodeStateEntryTraverserFactory(new MongoNodeStateEntryTraverserFactory(rootDocumentState.getRootRevision(), nodeStore, getMongoDocumentStore(), traversalLog, indexer));
            for (File dir : previousDownloadDirs) {
                builder.addExistingDataDumpDir(dir);
            }
            flatFileStore = builder.build();
            closer.register(flatFileStore);
        } catch (CompositeException e) {
            e.logAllExceptions("Underlying throwable caught during download", log);
            log.info("Could not build flat file store. Execution count {}. Retries left {}. Time elapsed {}", executionCount, MAX_DOWNLOAD_ATTEMPTS - executionCount, flatFileStoreWatch);
            lastException = e;
            previousDownloadDirs.add(builder.getFlatFileStoreDir());
            if (executionCount < MAX_DOWNLOAD_ATTEMPTS) {
                try {
                    log.info("Waiting for {} millis before retrying", backOffTimeInMillis);
                    Thread.sleep(backOffTimeInMillis);
                    backOffTimeInMillis *= 2;
                } catch (InterruptedException ie) {
                    log.error("Interrupted while waiting before retrying download ", ie);
                }
            }
        }
        executionCount++;
    }
    if (flatFileStore == null) {
        throw new IOException("Could not build flat file store", lastException);
    }
    log.info("Completed the flat file store build in {}", flatFileStoreWatch);
    return flatFileStore;
}
Also used : FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) DefaultMemoryManager(org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryManager) Stopwatch(com.google.common.base.Stopwatch) ArrayList(java.util.ArrayList) DocumentNodeStore(org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore) IOException(java.io.IOException) DocumentStoreSplitter(org.apache.jackrabbit.oak.plugins.document.mongo.DocumentStoreSplitter) DefaultMemoryManager(org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryManager) MemoryManager(org.apache.jackrabbit.oak.index.indexer.document.flatfile.MemoryManager) DocumentNodeState(org.apache.jackrabbit.oak.plugins.document.DocumentNodeState) File(java.io.File) FlatFileNodeStoreBuilder(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder)

Example 5 with FlatFileStore

use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.

the class IndexCommand method reindex.

private File reindex(IndexOptions idxOpts, ExtendedIndexHelper extendedIndexHelper, String checkpoint) throws IOException, CommitFailedException {
    checkNotNull(checkpoint, "Checkpoint value is required for reindexing done in read only mode");
    Stopwatch w = Stopwatch.createStarted();
    IndexerSupport indexerSupport = createIndexerSupport(extendedIndexHelper, checkpoint);
    log.info("Proceeding to index {} upto checkpoint {} {}", extendedIndexHelper.getIndexPaths(), checkpoint, indexerSupport.getCheckpointInfo());
    if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) {
        log.info("Using Document order traversal to perform reindexing");
        try (DocumentStoreIndexer indexer = new DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) {
            if (idxOpts.buildFlatFileStoreSeparately()) {
                FlatFileStore ffs = indexer.buildFlatFileStore();
                String pathToFFS = ffs.getFlatFileStorePath();
                System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, pathToFFS);
            }
            indexer.reindex();
        }
    } else {
        try (OutOfBandIndexer indexer = new OutOfBandIndexer(extendedIndexHelper, indexerSupport)) {
            indexer.reindex();
        }
    }
    indexerSupport.writeMetaInfo(checkpoint);
    File destDir = indexerSupport.copyIndexFilesToOutput();
    log.info("Indexing completed for indexes {} in {} ({} ms) and index files are copied to {}", extendedIndexHelper.getIndexPaths(), w, w.elapsed(TimeUnit.MILLISECONDS), IndexCommand.getPath(destDir));
    return destDir;
}
Also used : FlatFileStore(org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore) Stopwatch(com.google.common.base.Stopwatch) DocumentStoreIndexer(org.apache.jackrabbit.oak.index.indexer.document.DocumentStoreIndexer) File(java.io.File)

Aggregations

Stopwatch (com.google.common.base.Stopwatch)6 FlatFileStore (org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore)6 DocumentNodeState (org.apache.jackrabbit.oak.plugins.document.DocumentNodeState)5 DocumentNodeStore (org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore)5 File (java.io.File)4 FlatFileNodeStoreBuilder (org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 DefaultMemoryManager (org.apache.jackrabbit.oak.index.indexer.document.flatfile.DefaultMemoryManager)3 MemoryManager (org.apache.jackrabbit.oak.index.indexer.document.flatfile.MemoryManager)3 DocumentStoreSplitter (org.apache.jackrabbit.oak.plugins.document.mongo.DocumentStoreSplitter)3 MemoryNodeStore (org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore)3 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)3 NodeState (org.apache.jackrabbit.oak.spi.state.NodeState)3 NodeStore (org.apache.jackrabbit.oak.spi.state.NodeStore)3 IndexingProgressReporter (org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 Preconditions.checkNotNull (com.google.common.base.Preconditions.checkNotNull)1