use of com.google.common.collect.AbstractIterator in project grakn by graknlabs.
the class QueryParserImpl method parseList.
/**
* @param reader a reader representing several queries
* @return a list of queries
*/
@Override
public <T extends Query<?>> Stream<T> parseList(Reader reader) {
UnbufferedCharStream charStream = new UnbufferedCharStream(reader);
GraqlErrorListener errorListener = GraqlErrorListener.withoutQueryString();
GraqlLexer lexer = createLexer(charStream, errorListener);
/*
We tell the lexer to copy the text into each generated token.
Normally when calling `Token#getText`, it will look into the underlying `TokenStream` and call
`TokenStream#size` to check it is in-bounds. However, `UnbufferedTokenStream#size` is not supported
(because then it would have to read the entire input). To avoid this issue, we set this flag which will
copy over the text into each `Token`, s.t. that `Token#getText` will just look up the copied text field.
*/
lexer.setTokenFactory(new CommonTokenFactory(true));
// Use an unbuffered token stream so we can handle extremely large input strings
UnbufferedTokenStream tokenStream = new UnbufferedTokenStream(ChannelTokenSource.of(lexer));
GraqlParser parser = createParser(tokenStream, errorListener);
/*
The "bail" error strategy prevents us reading all the way to the end of the input, e.g.
```
match $x isa person; insert $x has name "Bob"; match $x isa movie; get;
^
```
In this example, when ANTLR reaches the indicated `match`, it considers two possibilities:
1. this is the end of the query
2. the user has made a mistake. Maybe they accidentally pasted the `match` here.
Because of case 2, ANTLR will parse beyond the `match` in order to produce a more helpful error message.
This causes memory issues for very large queries, so we use the simpler "bail" strategy that will
immediately stop when it hits `match`.
*/
parser.setErrorHandler(new BailErrorStrategy());
// This is a lazy iterator that will only consume a single query at a time, without parsing any further.
// This means it can pass arbitrarily long streams of queries in constant memory!
Iterable<T> queryIterator = () -> new AbstractIterator<T>() {
@Nullable
@Override
protected T computeNext() {
int latestToken = tokenStream.LA(1);
if (latestToken == Token.EOF) {
endOfData();
return null;
} else {
// When we next run it, it will start where it left off in the stream
return (T) QUERY.parse(parser, errorListener);
}
}
};
return StreamSupport.stream(queryIterator.spliterator(), false);
}
use of com.google.common.collect.AbstractIterator in project elephant-bird by twitter.
the class LuceneIndexCollectAllRecordReader method search.
/**
* Applies {@link #docToValue(Document)} to every document
* found by executing query over searcher
*
* @param searcher the index searcher to query
* @param query the query to run
* @return a list of values to be emitted as records (one by one) by this record reader
* @throws IOException
*/
@Override
protected Iterator<T> search(final IndexSearcher searcher, final Query query) throws IOException {
// grow the bit set if needed
docIds.set(searcher.getIndexReader().maxDoc());
// clear it
docIds.clear();
searcher.search(query, new Collector() {
private int docBase;
@Override
public void setScorer(Scorer scorer) {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void collect(int doc) {
docIds.set(doc + docBase);
}
public void setNextReader(AtomicReaderContext context) {
this.docBase = context.docBase;
}
});
return new AbstractIterator<T>() {
private int doc = docIds.nextSetBit(0);
@Override
protected T computeNext() {
// casting to avoid Hadoop 2 incompatibility
((Progressable) context).progress();
if (doc < 0) {
return endOfData();
}
try {
T ret = docToValue(searcher.doc(doc));
doc = docIds.nextSetBit(doc + 1);
return ret;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class MongoBlobStore method getAllChunkIds.
@Override
public Iterator<String> getAllChunkIds(long maxLastModifiedTime) throws Exception {
Bson fields = new BasicDBObject(MongoBlob.KEY_ID, 1);
Bson hint = new BasicDBObject("$hint", fields);
Bson query = new Document();
if (maxLastModifiedTime != 0 && maxLastModifiedTime != -1) {
query = Filters.lte(MongoBlob.KEY_LAST_MOD, maxLastModifiedTime);
}
final MongoCursor<MongoBlob> cur = getBlobCollection().find(query).projection(fields).modifiers(hint).iterator();
// TODO The cursor needs to be closed
return new AbstractIterator<String>() {
@Override
protected String computeNext() {
if (cur.hasNext()) {
MongoBlob blob = cur.next();
if (blob != null) {
return blob.getId();
}
}
return endOfData();
}
};
}
use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.
the class SolrQueryIndex method getIterator.
private AbstractIterator<SolrResultRow> getIterator(final Filter filter, final IndexPlan plan, final String parent, final int parentDepth, final OakSolrConfiguration configuration, final SolrClient solrServer, final LMSEstimator estimator) {
return new AbstractIterator<SolrResultRow>() {
public Collection<FacetField> facetFields = new LinkedList<FacetField>();
private final Set<String> seenPaths = Sets.newHashSet();
private final Deque<SolrResultRow> queue = Queues.newArrayDeque();
private int offset = 0;
private boolean noDocs = false;
private long numFound = 0;
@Override
protected SolrResultRow computeNext() {
if (!queue.isEmpty() || loadDocs()) {
return queue.remove();
}
return endOfData();
}
private SolrResultRow convertToRow(SolrDocument doc) {
String path = String.valueOf(doc.getFieldValue(configuration.getPathField()));
if ("".equals(path)) {
path = "/";
}
if (!parent.isEmpty()) {
path = getAncestorPath(path, parentDepth);
// avoid duplicate entries
if (seenPaths.contains(path)) {
return null;
}
seenPaths.add(path);
}
float score = 0f;
Object scoreObj = doc.get("score");
if (scoreObj != null) {
score = (Float) scoreObj;
}
return new SolrResultRow(path, score, doc, facetFields);
}
/**
* Loads the Solr documents in batches
* @return true if any document is loaded
*/
private boolean loadDocs() {
if (noDocs) {
return false;
}
try {
if (log.isDebugEnabled()) {
log.debug("converting filter {}", filter);
}
SolrQuery query = FilterQueryParser.getQuery(filter, plan, configuration);
if (numFound > 0) {
long rows = configuration.getRows();
long maxQueries = numFound / 2;
if (maxQueries > configuration.getRows()) {
// adjust the rows to avoid making more than 3 Solr requests for this particular query
rows = maxQueries;
query.setParam("rows", String.valueOf(rows));
}
long newOffset = configuration.getRows() + offset * rows;
if (newOffset >= numFound) {
return false;
}
query.setParam("start", String.valueOf(newOffset));
offset++;
}
if (log.isDebugEnabled()) {
log.debug("sending query {}", query);
}
QueryResponse queryResponse = solrServer.query(query);
if (log.isDebugEnabled()) {
log.debug("getting response {}", queryResponse.getHeader());
}
SolrDocumentList docs = queryResponse.getResults();
if (docs != null) {
numFound = docs.getNumFound();
estimator.update(filter, docs);
Map<String, Map<String, List<String>>> highlighting = queryResponse.getHighlighting();
for (SolrDocument doc : docs) {
// handle highlight
if (highlighting != null) {
Object pathObject = doc.getFieldValue(configuration.getPathField());
if (pathObject != null && highlighting.get(String.valueOf(pathObject)) != null) {
Map<String, List<String>> value = highlighting.get(String.valueOf(pathObject));
for (Map.Entry<String, List<String>> entry : value.entrySet()) {
// all highlighted values end up in 'rep:excerpt', regardless of field match
for (String v : entry.getValue()) {
doc.addField(QueryConstants.REP_EXCERPT, v);
}
}
}
}
SolrResultRow row = convertToRow(doc);
if (row != null) {
queue.add(row);
}
}
}
// get facets
List<FacetField> returnedFieldFacet = queryResponse.getFacetFields();
if (returnedFieldFacet != null) {
facetFields.addAll(returnedFieldFacet);
}
// filter facets on doc paths
if (!facetFields.isEmpty() && docs != null) {
for (SolrDocument doc : docs) {
String path = String.valueOf(doc.getFieldValue(configuration.getPathField()));
// if facet path doesn't exist for the calling user, filter the facet for this doc
for (FacetField ff : facetFields) {
if (!filter.isAccessible(path + "/" + ff.getName())) {
filterFacet(doc, ff);
}
}
}
}
// handle spellcheck
SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse();
if (spellCheckResponse != null && spellCheckResponse.getSuggestions() != null && spellCheckResponse.getSuggestions().size() > 0) {
putSpellChecks(spellCheckResponse, queue, filter, configuration, solrServer);
noDocs = true;
}
// handle suggest
NamedList<Object> response = queryResponse.getResponse();
Map suggest = (Map) response.get("suggest");
if (suggest != null) {
Set<Map.Entry<String, Object>> suggestEntries = suggest.entrySet();
if (!suggestEntries.isEmpty()) {
putSuggestions(suggestEntries, queue, filter, configuration, solrServer);
noDocs = true;
}
}
} catch (Exception e) {
if (log.isWarnEnabled()) {
log.warn("query via {} failed.", solrServer, e);
}
}
return !queue.isEmpty();
}
};
}
use of com.google.common.collect.AbstractIterator in project atlasdb by palantir.
the class DbKvs method loadColumnsForBatches.
private Iterator<Iterator<Map.Entry<Cell, Value>>> loadColumnsForBatches(TableReference tableRef, ColumnRangeSelection columnRangeSelection, long timestamp, Map<Sha256Hash, byte[]> rowHashesToBytes, Iterator<Map<Sha256Hash, Integer>> batches, Map<Sha256Hash, Integer> columnCountByRowHash) {
Iterator<Iterator<Map.Entry<Cell, Value>>> results = new AbstractIterator<Iterator<Map.Entry<Cell, Value>>>() {
private Sha256Hash lastRowHashInPreviousBatch = null;
private byte[] lastColumnInPreviousBatch = null;
@Override
protected Iterator<Map.Entry<Cell, Value>> computeNext() {
if (!batches.hasNext()) {
return endOfData();
}
Map<Sha256Hash, Integer> currentBatch = batches.next();
RowsColumnRangeBatchRequest columnRangeSelectionsByRow = getBatchColumnRangeSelectionsByRow(currentBatch, columnCountByRowHash);
Map<byte[], List<Map.Entry<Cell, Value>>> resultsByRow = extractRowColumnRangePage(tableRef, columnRangeSelectionsByRow, timestamp);
int totalEntries = resultsByRow.values().stream().mapToInt(List::size).sum();
if (totalEntries == 0) {
return Collections.emptyIterator();
}
// Ensure order matches that of the provided batch.
List<Map.Entry<Cell, Value>> loadedColumns = new ArrayList<>(totalEntries);
for (Sha256Hash rowHash : currentBatch.keySet()) {
byte[] row = rowHashesToBytes.get(rowHash);
loadedColumns.addAll(resultsByRow.get(row));
}
Cell lastCell = Iterables.getLast(loadedColumns).getKey();
lastRowHashInPreviousBatch = Sha256Hash.computeHash(lastCell.getRowName());
lastColumnInPreviousBatch = lastCell.getColumnName();
return loadedColumns.iterator();
}
private RowsColumnRangeBatchRequest getBatchColumnRangeSelectionsByRow(Map<Sha256Hash, Integer> columnCountsByRowHashInBatch, Map<Sha256Hash, Integer> totalColumnCountsByRowHash) {
ImmutableRowsColumnRangeBatchRequest.Builder rowsColumnRangeBatch = ImmutableRowsColumnRangeBatchRequest.builder().columnRangeSelection(columnRangeSelection);
Iterator<Map.Entry<Sha256Hash, Integer>> entries = columnCountsByRowHashInBatch.entrySet().iterator();
while (entries.hasNext()) {
Map.Entry<Sha256Hash, Integer> entry = entries.next();
Sha256Hash rowHash = entry.getKey();
byte[] row = rowHashesToBytes.get(rowHash);
boolean isPartialFirstRow = Objects.equals(lastRowHashInPreviousBatch, rowHash);
if (isPartialFirstRow) {
byte[] startCol = RangeRequests.nextLexicographicName(lastColumnInPreviousBatch);
BatchColumnRangeSelection columnRange = BatchColumnRangeSelection.create(startCol, columnRangeSelection.getEndCol(), entry.getValue());
rowsColumnRangeBatch.partialFirstRow(Maps.immutableEntry(row, columnRange));
continue;
}
boolean isFullyLoadedRow = totalColumnCountsByRowHash.get(rowHash).equals(entry.getValue());
if (isFullyLoadedRow) {
rowsColumnRangeBatch.addRowsToLoadFully(row);
} else {
Preconditions.checkArgument(!entries.hasNext(), "Only the last row should be partial.");
BatchColumnRangeSelection columnRange = BatchColumnRangeSelection.create(columnRangeSelection, entry.getValue());
rowsColumnRangeBatch.partialLastRow(Maps.immutableEntry(row, columnRange));
}
}
return rowsColumnRangeBatch.build();
}
};
return results;
}
Aggregations