use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class Segment method readReferencedSegments.
private SegmentReferences readReferencedSegments(final SegmentIdProvider idProvider) {
checkState(getReferencedSegmentIdCount() + 1 < 0xffff, "Segment cannot have more than 0xffff references");
final int referencedSegmentIdCount = getReferencedSegmentIdCount();
// We need to keep SegmentId references (as opposed to e.g. UUIDs)
// here as frequently resolving the segment ids via the segment id
// tables is prohibitively expensive.
// These SegmentId references are not a problem wrt. heap usage as
// their individual memoised references to their underlying segment
// is managed via the SegmentCache. It is the size of that cache that
// keeps overall heap usage by Segment instances bounded.
// See OAK-6106.
final SegmentId[] refIds = new SegmentId[referencedSegmentIdCount];
return new SegmentReferences() {
@Override
public SegmentId getSegmentId(int reference) {
checkArgument(reference <= referencedSegmentIdCount, "Segment reference out of bounds");
SegmentId id = refIds[reference - 1];
if (id == null) {
synchronized (refIds) {
id = refIds[reference - 1];
if (id == null) {
long msb = data.getSegmentReferenceMsb(reference - 1);
long lsb = data.getSegmentReferenceLsb(reference - 1);
id = idProvider.newSegmentId(msb, lsb);
refIds[reference - 1] = id;
}
}
}
return id;
}
@Nonnull
@Override
public Iterator<SegmentId> iterator() {
return new AbstractIterator<SegmentId>() {
private int reference = 1;
@Override
protected SegmentId computeNext() {
if (reference <= referencedSegmentIdCount) {
return getSegmentId(reference++);
} else {
return endOfData();
}
}
};
}
};
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class Utils method internalGetSelectedDocuments.
private static Iterable<NodeDocument> internalGetSelectedDocuments(final DocumentStore store, final String indexedProperty, final long startValue, final int batchSize) {
if (batchSize < 2) {
throw new IllegalArgumentException("batchSize must be > 1");
}
return new Iterable<NodeDocument>() {
@Override
public Iterator<NodeDocument> iterator() {
return new AbstractIterator<NodeDocument>() {
private String startId = NodeDocument.MIN_ID_VALUE;
private Iterator<NodeDocument> batch = nextBatch();
@Override
protected NodeDocument computeNext() {
// read next batch if necessary
if (!batch.hasNext()) {
batch = nextBatch();
}
NodeDocument doc;
if (batch.hasNext()) {
doc = batch.next();
// remember current id
startId = doc.getId();
} else {
doc = endOfData();
}
return doc;
}
private Iterator<NodeDocument> nextBatch() {
List<NodeDocument> result = indexedProperty == null ? store.query(Collection.NODES, startId, NodeDocument.MAX_ID_VALUE, batchSize) : store.query(Collection.NODES, startId, NodeDocument.MAX_ID_VALUE, indexedProperty, startValue, batchSize);
return result.iterator();
}
};
}
};
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class VersionGarbageCollectorIT method cancelGCAfterFirstPhase.
@Test
public void cancelGCAfterFirstPhase() throws Exception {
createTestNode("foo");
NodeBuilder builder = store.getRoot().builder();
builder.child("foo").child("bar");
merge(store, builder);
builder = store.getRoot().builder();
builder.child("foo").remove();
merge(store, builder);
store.runBackgroundOperations();
clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1));
final AtomicReference<VersionGarbageCollector> gcRef = Atomics.newReference();
VersionGCSupport gcSupport = new VersionGCSupport(store.getDocumentStore()) {
@Override
public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
return new Iterable<NodeDocument>() {
@Nonnull
@Override
public Iterator<NodeDocument> iterator() {
return new AbstractIterator<NodeDocument>() {
private Iterator<NodeDocument> it = candidates(fromModified, toModified);
@Override
protected NodeDocument computeNext() {
if (it.hasNext()) {
return it.next();
}
// cancel when we reach the end
gcRef.get().cancel();
return endOfData();
}
};
}
};
}
private Iterator<NodeDocument> candidates(long prevLastModifiedTime, long lastModifiedTime) {
return super.getPossiblyDeletedDocs(prevLastModifiedTime, lastModifiedTime).iterator();
}
};
gcRef.set(new VersionGarbageCollector(store, gcSupport));
VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES);
assertTrue(stats.canceled);
assertEquals(0, stats.deletedDocGCCount);
assertEquals(0, stats.deletedLeafDocGCCount);
assertEquals(0, stats.intermediateSplitDocGCCount);
assertEquals(0, stats.splitDocGCCount);
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class LuceneIndex method query.
@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
final Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
final Set<String> relPaths = getRelativePaths(ft);
if (relPaths.size() > 1) {
return new MultiLuceneIndex(filter, rootState, relPaths).query();
}
final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
// we only restrict non-full-text conditions if there is
// no relative property in the full-text constraint
final boolean nonFullTextConstraints = parent.isEmpty();
final int parentDepth = getDepth(parent);
QueryLimits settings = filter.getQueryLimits();
Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
private final Set<String> seenPaths = Sets.newHashSet();
private ScoreDoc lastDoc;
private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private long lastSearchIndexerVersion;
private int reloadCount;
@Override
protected LuceneResultRow computeNext() {
while (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
return endOfData();
}
private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException {
IndexReader reader = searcher.getIndexReader();
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
String path = visitor.getPath();
if (path != null) {
if ("".equals(path)) {
path = "/";
}
if (!parent.isEmpty()) {
// TODO OAK-828 this breaks node aggregation
// get the base path
// ensure the path ends with the given
// relative path
// if (!path.endsWith("/" + parent)) {
// continue;
// }
path = getAncestorPath(path, parentDepth);
// avoid duplicate entries
if (seenPaths.contains(path)) {
return null;
}
seenPaths.add(path);
}
return new LuceneResultRow(path, doc.score, excerpt);
}
return null;
}
/**
* Loads the lucene documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
ScoreDoc lastDocToRecord = null;
IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TopDocs docs;
long time = System.currentTimeMillis();
checkForIndexVersionChange(searcher);
while (true) {
if (lastDoc != null) {
LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
} else {
LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
docs = searcher.search(query, nextBatchSize);
}
time = System.currentTimeMillis() - time;
LOG.debug("... took {} ms", time);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.REP_EXCERPT);
boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
if (addExcerpt) {
// setup highlighter
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(true);
highlighter.setFragmentScorer(scorer);
}
for (ScoreDoc doc : docs.scoreDocs) {
String excerpt = null;
if (addExcerpt) {
excerpt = getExcerpt(analyzer, searcher, doc);
}
LuceneResultRow row = convertToRow(doc, searcher, excerpt);
if (row != null) {
queue.add(row);
}
lastDocToRecord = doc;
}
if (queue.isEmpty() && docs.scoreDocs.length > 0) {
lastDoc = lastDocToRecord;
} else {
break;
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
// ACL filter spellchecks
Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length);
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer());
for (SuggestWord suggestion : suggestWords) {
Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
suggestedWords.add(suggestion.string);
break;
}
}
}
}
queue.add(new LuceneResultRow(suggestedWords));
} else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
// ACL filter suggestions
Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size());
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer());
for (Lookup.LookupResult suggestion : lookupResults) {
Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString());
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) {
suggestedWords.add("{term=" + suggestion.key + ",weight=" + suggestion.value + "}");
break;
}
}
}
}
queue.add(new LuceneResultRow(suggestedWords));
}
} catch (IOException e) {
LOG.warn("query via {} failed.", LuceneIndex.this, e);
} finally {
indexNode.release();
}
if (lastDocToRecord != null) {
this.lastDoc = lastDocToRecord;
}
return !queue.isEmpty();
}
private void checkForIndexVersionChange(IndexSearcher searcher) {
long currentVersion = LucenePropertyIndex.getVersion(searcher);
if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
reloadCount++;
if (reloadCount > MAX_RELOAD_COUNT) {
LOG.error("More than {} index version changes detected for query {}", MAX_RELOAD_COUNT, plan);
throw new IllegalStateException("Too many version changes");
}
lastDoc = null;
LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset; reload {}", currentVersion, lastSearchIndexerVersion, reloadCount);
}
this.lastSearchIndexerVersion = currentVersion;
}
};
SizeEstimator sizeEstimator = new SizeEstimator() {
@Override
public long getSize() {
IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH));
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(query, collector);
int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query, totalHits);
return totalHits;
}
LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
} catch (IOException e) {
LOG.warn("query via {} failed.", LuceneIndex.this, e);
} finally {
indexNode.release();
}
return -1;
}
};
return new LucenePathCursor(itr, settings, sizeEstimator, filter);
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class LucenePropertyIndex method query.
@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
final Filter filter = plan.getFilter();
final Sort sort = getSort(plan);
final PlanResult pr = getPlanResult(plan);
QueryLimits settings = filter.getQueryLimits();
Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
private final Set<String> seenPaths = Sets.newHashSet();
private ScoreDoc lastDoc;
private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private IndexSearcher indexSearcher;
private int indexNodeId = -1;
@Override
protected LuceneResultRow computeNext() {
while (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
releaseSearcher();
return endOfData();
}
private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, Map<String, String> excerpts, Facets facets, String explanation) throws IOException {
IndexReader reader = searcher.getIndexReader();
// TODO Look into usage of field cache for retrieving the path
// instead of reading via reader if no of docs in index are limited
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
String path = visitor.getPath();
if (path != null) {
if ("".equals(path)) {
path = "/";
}
if (pr.isPathTransformed()) {
String originalPath = path;
path = pr.transformPath(path);
if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
return null;
}
// avoid duplicate entries
if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
return null;
}
seenPaths.add(path);
}
boolean shouldIncludeForHierarchy = shouldInclude(path, plan);
LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy);
return shouldIncludeForHierarchy ? new LuceneResultRow(path, doc.score, excerpts, facets, explanation) : null;
}
return null;
}
/**
* Loads the lucene documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
ScoreDoc lastDocToRecord = null;
final IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = getCurrentSearcher(indexNode);
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);
if (customScoreQuery != null) {
query = customScoreQuery;
}
TopDocs docs;
long start = PERF_LOGGER.start();
while (true) {
if (lastDoc != null) {
LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
} else {
docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
}
} else {
LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
if (sort == null) {
docs = searcher.search(query, nextBatchSize);
} else {
docs = searcher.search(query, nextBatchSize, sort);
}
}
PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
long f = PERF_LOGGER.start();
Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets());
PERF_LOGGER.end(f, -1, "facets retrieved");
Set<String> excerptFields = Sets.newHashSet();
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
if (QueryConstants.REP_EXCERPT.equals(pr.propertyName)) {
String value = pr.first.getValue(Type.STRING);
excerptFields.add(value);
}
}
boolean addExcerpt = excerptFields.size() > 0;
PropertyRestriction restriction = filter.getPropertyRestriction(QueryConstants.OAK_SCORE_EXPLANATION);
boolean addExplain = restriction != null && restriction.isNotNullRestriction();
Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
FieldInfos mergedFieldInfos = null;
if (addExcerpt) {
// setup highlighter
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(true);
highlighter.setFragmentScorer(scorer);
mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
}
for (ScoreDoc doc : docs.scoreDocs) {
Map<String, String> excerpts = null;
if (addExcerpt) {
excerpts = getExcerpt(query, excerptFields, analyzer, searcher, doc, mergedFieldInfos);
}
String explanation = null;
if (addExplain) {
explanation = searcher.explain(query, doc.doc).toString();
}
LuceneResultRow row = convertToRow(doc, searcher, excerpts, facets, explanation);
if (row != null) {
queue.add(row);
}
lastDocToRecord = doc;
}
if (queue.isEmpty() && docs.scoreDocs.length > 0) {
// queue is still empty but more results can be fetched
// from Lucene so still continue
lastDoc = lastDocToRecord;
} else {
break;
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
noDocs = true;
SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
// ACL filter spellchecks
QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
for (SuggestWord suggestion : suggestWords) {
Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.string));
break;
}
}
}
}
} else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
noDocs = true;
List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer());
// ACL filter suggestions
for (Lookup.LookupResult suggestion : lookupResults) {
Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
query = addDescendantClauseIfRequired(query, plan);
TopDocs topDocs = searcher.search(query, 100);
if (topDocs.totalHits > 0) {
for (ScoreDoc doc : topDocs.scoreDocs) {
Document retrievedDoc = searcher.doc(doc.doc);
String prefix = filter.getPath();
if (prefix.length() == 1) {
prefix = "";
}
if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
break;
}
}
}
}
}
} catch (Exception e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
if (lastDocToRecord != null) {
this.lastDoc = lastDocToRecord;
}
return !queue.isEmpty();
}
private IndexSearcher getCurrentSearcher(IndexNode indexNode) {
// the searcher would be refreshed as done earlier
if (indexNodeId != indexNode.getIndexNodeId()) {
// if already initialized then log about change
if (indexNodeId > 0) {
LOG.debug("Change in index version detected. Query would be performed without offset");
}
indexSearcher = indexNode.getSearcher();
indexNodeId = indexNode.getIndexNodeId();
lastDoc = null;
}
return indexSearcher;
}
private void releaseSearcher() {
// For now nullifying it.
indexSearcher = null;
}
};
SizeEstimator sizeEstimator = new SizeEstimator() {
@Override
public long getSize() {
IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(query, collector);
int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query, totalHits);
return totalHits;
}
LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
} catch (IOException e) {
LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
} finally {
indexNode.release();
}
return -1;
}
};
if (pr.hasPropertyIndexResult() || pr.evaluateSyncNodeTypeRestriction()) {
itr = mergePropertyIndexResult(plan, rootState, itr);
}
return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
Aggregations