use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.
the class DocumentStoreIndexer method reindex.
public void reindex() throws CommitFailedException, IOException {
configureEstimators();
NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
NodeBuilder builder = copyOnWriteStore.getRoot().builder();
CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder);
if (indexer.isEmpty()) {
return;
}
closer.register(indexer);
// TODO How to ensure we can safely read from secondary
DocumentNodeState rootDocumentState = (DocumentNodeState) checkpointedState;
DocumentNodeStore nodeStore = (DocumentNodeStore) indexHelper.getNodeStore();
NodeStateEntryTraverser nsep = new NodeStateEntryTraverser(rootDocumentState.getRootRevision(), nodeStore, getMongoDocumentStore()).withProgressCallback(this::reportDocumentRead).withPathPredicate(indexer::shouldInclude);
closer.register(nsep);
// As first traversal is for dumping change the message prefix
progressReporter.setMessagePrefix("Dumping");
// TODO Use flatFileStore only if we have relative nodes to be indexed
FlatFileStore flatFileStore = new FlatFileNodeStoreBuilder(nsep, indexHelper.getWorkDir()).withBlobStore(indexHelper.getGCBlobStore()).withPreferredPathElements(indexer.getRelativeIndexedNodeNames()).build();
closer.register(flatFileStore);
progressReporter.reset();
if (flatFileStore.getEntryCount() > 0) {
progressReporter.setNodeCountEstimator((String basePath, Set<String> indexPaths) -> flatFileStore.getEntryCount());
}
progressReporter.reindexingTraversalStart("/");
Stopwatch indexerWatch = Stopwatch.createStarted();
for (NodeStateEntry entry : flatFileStore) {
reportDocumentRead(entry.getPath());
indexer.index(entry);
}
progressReporter.reindexingTraversalEnd();
progressReporter.logReport();
log.info("Completed the indexing in {}", indexerWatch);
copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
indexerSupport.postIndexWork(copyOnWriteStore);
}
use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.
the class DocumentStoreIndexerBase method buildFlatFileStore.
/**
* @return an Instance of FlatFileStore, whose getFlatFileStorePath() method can be used to get the absolute path to this store.
* @throws IOException
* @throws CommitFailedException
*/
public FlatFileStore buildFlatFileStore() throws IOException, CommitFailedException {
NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
NodeBuilder builder = copyOnWriteStore.getRoot().builder();
NodeState root = builder.getNodeState();
indexerSupport.updateIndexDefinitions(builder);
IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder();
Set<String> preferredPathElements = new HashSet<>();
Set<IndexDefinition> indexDefinitions = new HashSet<>();
for (String indexPath : indexHelper.getIndexPaths()) {
NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false);
IndexDefinition indexDf = indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build();
preferredPathElements.addAll(indexDf.getRelativeNodeNames());
indexDefinitions.add(indexDf);
}
Predicate<String> predicate = s -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(s) != PathFilter.Result.EXCLUDE);
FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, null, predicate, preferredPathElements);
log.info("FlatFileStore built at {}. To use this flatFileStore in a reindex step, set System Property-{} with value {}", flatFileStore.getFlatFileStorePath(), OAK_INDEXER_SORTED_FILE_PATH, flatFileStore.getFlatFileStorePath());
return flatFileStore;
}
use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.
the class DocumentStoreIndexerBase method reindex.
public void reindex() throws CommitFailedException, IOException {
IndexingProgressReporter progressReporter = new IndexingProgressReporter(IndexUpdateCallback.NOOP, NodeTraversalCallback.NOOP);
configureEstimators(progressReporter);
NodeState checkpointedState = indexerSupport.retrieveNodeStateForCheckpoint();
NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
indexerSupport.switchIndexLanesAndReindexFlag(copyOnWriteStore);
NodeBuilder builder = copyOnWriteStore.getRoot().builder();
CompositeIndexer indexer = prepareIndexers(copyOnWriteStore, builder, progressReporter);
if (indexer.isEmpty()) {
return;
}
closer.register(indexer);
FlatFileStore flatFileStore = buildFlatFileStore(checkpointedState, indexer, indexer::shouldInclude, null);
progressReporter.reset();
if (flatFileStore.getEntryCount() > 0) {
FlatFileStore finalFlatFileStore = flatFileStore;
progressReporter.setNodeCountEstimator((String basePath, Set<String> indexPaths) -> finalFlatFileStore.getEntryCount());
}
progressReporter.reindexingTraversalStart("/");
preIndexOpertaions(indexer.getIndexers());
Stopwatch indexerWatch = Stopwatch.createStarted();
for (NodeStateEntry entry : flatFileStore) {
reportDocumentRead(entry.getPath(), progressReporter);
indexer.index(entry);
}
progressReporter.reindexingTraversalEnd();
progressReporter.logReport();
log.info("Completed the indexing in {}", indexerWatch);
copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
indexerSupport.postIndexWork(copyOnWriteStore);
}
use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.
the class DocumentStoreIndexerBase method buildFlatFileStore.
private FlatFileStore buildFlatFileStore(NodeState checkpointedState, CompositeIndexer indexer, Predicate<String> pathPredicate, Set<String> preferredPathElements) throws IOException {
Stopwatch flatFileStoreWatch = Stopwatch.createStarted();
int executionCount = 1;
CompositeException lastException = null;
List<File> previousDownloadDirs = new ArrayList<>();
FlatFileStore flatFileStore = null;
// TODO How to ensure we can safely read from secondary
DocumentNodeState rootDocumentState = (DocumentNodeState) checkpointedState;
DocumentNodeStore nodeStore = (DocumentNodeStore) indexHelper.getNodeStore();
DocumentStoreSplitter splitter = new DocumentStoreSplitter(getMongoDocumentStore());
List<Long> lastModifiedBreakPoints = splitter.split(Collection.NODES, 0L, 10);
FlatFileNodeStoreBuilder builder = null;
int backOffTimeInMillis = 5000;
MemoryManager memoryManager = new DefaultMemoryManager();
while (flatFileStore == null && executionCount <= MAX_DOWNLOAD_ATTEMPTS) {
try {
builder = new FlatFileNodeStoreBuilder(indexHelper.getWorkDir(), memoryManager).withLastModifiedBreakPoints(lastModifiedBreakPoints).withBlobStore(indexHelper.getGCBlobStore()).withPreferredPathElements((preferredPathElements != null) ? preferredPathElements : indexer.getRelativeIndexedNodeNames()).addExistingDataDumpDir(indexerSupport.getExistingDataDumpDir()).withPathPredicate(pathPredicate).withNodeStateEntryTraverserFactory(new MongoNodeStateEntryTraverserFactory(rootDocumentState.getRootRevision(), nodeStore, getMongoDocumentStore(), traversalLog, indexer));
for (File dir : previousDownloadDirs) {
builder.addExistingDataDumpDir(dir);
}
flatFileStore = builder.build();
closer.register(flatFileStore);
} catch (CompositeException e) {
e.logAllExceptions("Underlying throwable caught during download", log);
log.info("Could not build flat file store. Execution count {}. Retries left {}. Time elapsed {}", executionCount, MAX_DOWNLOAD_ATTEMPTS - executionCount, flatFileStoreWatch);
lastException = e;
previousDownloadDirs.add(builder.getFlatFileStoreDir());
if (executionCount < MAX_DOWNLOAD_ATTEMPTS) {
try {
log.info("Waiting for {} millis before retrying", backOffTimeInMillis);
Thread.sleep(backOffTimeInMillis);
backOffTimeInMillis *= 2;
} catch (InterruptedException ie) {
log.error("Interrupted while waiting before retrying download ", ie);
}
}
}
executionCount++;
}
if (flatFileStore == null) {
throw new IOException("Could not build flat file store", lastException);
}
log.info("Completed the flat file store build in {}", flatFileStoreWatch);
return flatFileStore;
}
use of org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileStore in project jackrabbit-oak by apache.
the class IndexCommand method reindex.
private File reindex(IndexOptions idxOpts, ExtendedIndexHelper extendedIndexHelper, String checkpoint) throws IOException, CommitFailedException {
checkNotNull(checkpoint, "Checkpoint value is required for reindexing done in read only mode");
Stopwatch w = Stopwatch.createStarted();
IndexerSupport indexerSupport = createIndexerSupport(extendedIndexHelper, checkpoint);
log.info("Proceeding to index {} upto checkpoint {} {}", extendedIndexHelper.getIndexPaths(), checkpoint, indexerSupport.getCheckpointInfo());
if (opts.getCommonOpts().isMongo() && idxOpts.isDocTraversalMode()) {
log.info("Using Document order traversal to perform reindexing");
try (DocumentStoreIndexer indexer = new DocumentStoreIndexer(extendedIndexHelper, indexerSupport)) {
if (idxOpts.buildFlatFileStoreSeparately()) {
FlatFileStore ffs = indexer.buildFlatFileStore();
String pathToFFS = ffs.getFlatFileStorePath();
System.setProperty(OAK_INDEXER_SORTED_FILE_PATH, pathToFFS);
}
indexer.reindex();
}
} else {
try (OutOfBandIndexer indexer = new OutOfBandIndexer(extendedIndexHelper, indexerSupport)) {
indexer.reindex();
}
}
indexerSupport.writeMetaInfo(checkpoint);
File destDir = indexerSupport.copyIndexFilesToOutput();
log.info("Indexing completed for indexes {} in {} ({} ms) and index files are copied to {}", extendedIndexHelper.getIndexPaths(), w, w.elapsed(TimeUnit.MILLISECONDS), IndexCommand.getPath(destDir));
return destDir;
}
Aggregations