Search in sources :

Example 6 with AbstractIterator

use of com.google.common.collect.AbstractIterator in project grakn by graknlabs.

the class QueryParserImpl method parseList.

/**
 * @param reader a reader representing several queries
 * @return a list of queries
 */
@Override
public <T extends Query<?>> Stream<T> parseList(Reader reader) {
    UnbufferedCharStream charStream = new UnbufferedCharStream(reader);
    GraqlErrorListener errorListener = GraqlErrorListener.withoutQueryString();
    GraqlLexer lexer = createLexer(charStream, errorListener);
    /*
            We tell the lexer to copy the text into each generated token.
            Normally when calling `Token#getText`, it will look into the underlying `TokenStream` and call
            `TokenStream#size` to check it is in-bounds. However, `UnbufferedTokenStream#size` is not supported
            (because then it would have to read the entire input). To avoid this issue, we set this flag which will
            copy over the text into each `Token`, s.t. that `Token#getText` will just look up the copied text field.
        */
    lexer.setTokenFactory(new CommonTokenFactory(true));
    // Use an unbuffered token stream so we can handle extremely large input strings
    UnbufferedTokenStream tokenStream = new UnbufferedTokenStream(ChannelTokenSource.of(lexer));
    GraqlParser parser = createParser(tokenStream, errorListener);
    /*
            The "bail" error strategy prevents us reading all the way to the end of the input, e.g.

            ```
            match $x isa person; insert $x has name "Bob"; match $x isa movie; get;
                                                           ^
            ```

            In this example, when ANTLR reaches the indicated `match`, it considers two possibilities:

            1. this is the end of the query
            2. the user has made a mistake. Maybe they accidentally pasted the `match` here.

            Because of case 2, ANTLR will parse beyond the `match` in order to produce a more helpful error message.
            This causes memory issues for very large queries, so we use the simpler "bail" strategy that will
            immediately stop when it hits `match`.
        */
    parser.setErrorHandler(new BailErrorStrategy());
    // This is a lazy iterator that will only consume a single query at a time, without parsing any further.
    // This means it can pass arbitrarily long streams of queries in constant memory!
    Iterable<T> queryIterator = () -> new AbstractIterator<T>() {

        @Nullable
        @Override
        protected T computeNext() {
            int latestToken = tokenStream.LA(1);
            if (latestToken == Token.EOF) {
                endOfData();
                return null;
            } else {
                // When we next run it, it will start where it left off in the stream
                return (T) QUERY.parse(parser, errorListener);
            }
        }
    };
    return StreamSupport.stream(queryIterator.spliterator(), false);
}
Also used : CommonTokenFactory(org.antlr.v4.runtime.CommonTokenFactory) GraqlParser(ai.grakn.graql.internal.antlr.GraqlParser) BailErrorStrategy(org.antlr.v4.runtime.BailErrorStrategy) UnbufferedCharStream(org.antlr.v4.runtime.UnbufferedCharStream) AbstractIterator(com.google.common.collect.AbstractIterator) UnbufferedTokenStream(org.antlr.v4.runtime.UnbufferedTokenStream) GraqlLexer(ai.grakn.graql.internal.antlr.GraqlLexer)

Example 7 with AbstractIterator

use of com.google.common.collect.AbstractIterator in project elephant-bird by twitter.

the class LuceneIndexCollectAllRecordReader method search.

/**
 * Applies {@link #docToValue(Document)} to every document
 * found by executing query over searcher
 *
 * @param searcher the index searcher to query
 * @param query the query to run
 * @return a list of values to be emitted as records (one by one) by this record reader
 * @throws IOException
 */
@Override
protected Iterator<T> search(final IndexSearcher searcher, final Query query) throws IOException {
    // grow the bit set if needed
    docIds.set(searcher.getIndexReader().maxDoc());
    // clear it
    docIds.clear();
    searcher.search(query, new Collector() {

        private int docBase;

        @Override
        public void setScorer(Scorer scorer) {
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        @Override
        public void collect(int doc) {
            docIds.set(doc + docBase);
        }

        public void setNextReader(AtomicReaderContext context) {
            this.docBase = context.docBase;
        }
    });
    return new AbstractIterator<T>() {

        private int doc = docIds.nextSetBit(0);

        @Override
        protected T computeNext() {
            // casting to avoid Hadoop 2 incompatibility
            ((Progressable) context).progress();
            if (doc < 0) {
                return endOfData();
            }
            try {
                T ret = docToValue(searcher.doc(doc));
                doc = docIds.nextSetBit(doc + 1);
                return ret;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
}
Also used : Progressable(org.apache.hadoop.util.Progressable) Collector(org.apache.lucene.search.Collector) Scorer(org.apache.lucene.search.Scorer) AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) AtomicReaderContext(org.apache.lucene.index.AtomicReaderContext)

Example 8 with AbstractIterator

use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.

the class MongoBlobStore method getAllChunkIds.

@Override
public Iterator<String> getAllChunkIds(long maxLastModifiedTime) throws Exception {
    Bson fields = new BasicDBObject(MongoBlob.KEY_ID, 1);
    Bson hint = new BasicDBObject("$hint", fields);
    Bson query = new Document();
    if (maxLastModifiedTime != 0 && maxLastModifiedTime != -1) {
        query = Filters.lte(MongoBlob.KEY_LAST_MOD, maxLastModifiedTime);
    }
    final MongoCursor<MongoBlob> cur = getBlobCollection().find(query).projection(fields).modifiers(hint).iterator();
    // TODO The cursor needs to be closed
    return new AbstractIterator<String>() {

        @Override
        protected String computeNext() {
            if (cur.hasNext()) {
                MongoBlob blob = cur.next();
                if (blob != null) {
                    return blob.getId();
                }
            }
            return endOfData();
        }
    };
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) AbstractIterator(com.google.common.collect.AbstractIterator) Document(org.bson.Document) Bson(org.bson.conversions.Bson)

Example 9 with AbstractIterator

use of com.google.common.collect.AbstractIterator in project jackrabbit-oak by apache.

the class SolrQueryIndex method getIterator.

private AbstractIterator<SolrResultRow> getIterator(final Filter filter, final IndexPlan plan, final String parent, final int parentDepth, final OakSolrConfiguration configuration, final SolrClient solrServer, final LMSEstimator estimator) {
    return new AbstractIterator<SolrResultRow>() {

        public Collection<FacetField> facetFields = new LinkedList<FacetField>();

        private final Set<String> seenPaths = Sets.newHashSet();

        private final Deque<SolrResultRow> queue = Queues.newArrayDeque();

        private int offset = 0;

        private boolean noDocs = false;

        private long numFound = 0;

        @Override
        protected SolrResultRow computeNext() {
            if (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private SolrResultRow convertToRow(SolrDocument doc) {
            String path = String.valueOf(doc.getFieldValue(configuration.getPathField()));
            if ("".equals(path)) {
                path = "/";
            }
            if (!parent.isEmpty()) {
                path = getAncestorPath(path, parentDepth);
                // avoid duplicate entries
                if (seenPaths.contains(path)) {
                    return null;
                }
                seenPaths.add(path);
            }
            float score = 0f;
            Object scoreObj = doc.get("score");
            if (scoreObj != null) {
                score = (Float) scoreObj;
            }
            return new SolrResultRow(path, score, doc, facetFields);
        }

        /**
         * Loads the Solr documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {
            if (noDocs) {
                return false;
            }
            try {
                if (log.isDebugEnabled()) {
                    log.debug("converting filter {}", filter);
                }
                SolrQuery query = FilterQueryParser.getQuery(filter, plan, configuration);
                if (numFound > 0) {
                    long rows = configuration.getRows();
                    long maxQueries = numFound / 2;
                    if (maxQueries > configuration.getRows()) {
                        // adjust the rows to avoid making more than 3 Solr requests for this particular query
                        rows = maxQueries;
                        query.setParam("rows", String.valueOf(rows));
                    }
                    long newOffset = configuration.getRows() + offset * rows;
                    if (newOffset >= numFound) {
                        return false;
                    }
                    query.setParam("start", String.valueOf(newOffset));
                    offset++;
                }
                if (log.isDebugEnabled()) {
                    log.debug("sending query {}", query);
                }
                QueryResponse queryResponse = solrServer.query(query);
                if (log.isDebugEnabled()) {
                    log.debug("getting response {}", queryResponse.getHeader());
                }
                SolrDocumentList docs = queryResponse.getResults();
                if (docs != null) {
                    numFound = docs.getNumFound();
                    estimator.update(filter, docs);
                    Map<String, Map<String, List<String>>> highlighting = queryResponse.getHighlighting();
                    for (SolrDocument doc : docs) {
                        // handle highlight
                        if (highlighting != null) {
                            Object pathObject = doc.getFieldValue(configuration.getPathField());
                            if (pathObject != null && highlighting.get(String.valueOf(pathObject)) != null) {
                                Map<String, List<String>> value = highlighting.get(String.valueOf(pathObject));
                                for (Map.Entry<String, List<String>> entry : value.entrySet()) {
                                    // all highlighted values end up in 'rep:excerpt', regardless of field match
                                    for (String v : entry.getValue()) {
                                        doc.addField(QueryConstants.REP_EXCERPT, v);
                                    }
                                }
                            }
                        }
                        SolrResultRow row = convertToRow(doc);
                        if (row != null) {
                            queue.add(row);
                        }
                    }
                }
                // get facets
                List<FacetField> returnedFieldFacet = queryResponse.getFacetFields();
                if (returnedFieldFacet != null) {
                    facetFields.addAll(returnedFieldFacet);
                }
                // filter facets on doc paths
                if (!facetFields.isEmpty() && docs != null) {
                    for (SolrDocument doc : docs) {
                        String path = String.valueOf(doc.getFieldValue(configuration.getPathField()));
                        // if facet path doesn't exist for the calling user, filter the facet for this doc
                        for (FacetField ff : facetFields) {
                            if (!filter.isAccessible(path + "/" + ff.getName())) {
                                filterFacet(doc, ff);
                            }
                        }
                    }
                }
                // handle spellcheck
                SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse();
                if (spellCheckResponse != null && spellCheckResponse.getSuggestions() != null && spellCheckResponse.getSuggestions().size() > 0) {
                    putSpellChecks(spellCheckResponse, queue, filter, configuration, solrServer);
                    noDocs = true;
                }
                // handle suggest
                NamedList<Object> response = queryResponse.getResponse();
                Map suggest = (Map) response.get("suggest");
                if (suggest != null) {
                    Set<Map.Entry<String, Object>> suggestEntries = suggest.entrySet();
                    if (!suggestEntries.isEmpty()) {
                        putSuggestions(suggestEntries, queue, filter, configuration, solrServer);
                        noDocs = true;
                    }
                }
            } catch (Exception e) {
                if (log.isWarnEnabled()) {
                    log.warn("query via {} failed.", solrServer, e);
                }
            }
            return !queue.isEmpty();
        }
    };
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) FacetField(org.apache.solr.client.solrj.response.FacetField) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SpellCheckResponse(org.apache.solr.client.solrj.response.SpellCheckResponse) SolrDocument(org.apache.solr.common.SolrDocument) SolrDocumentList(org.apache.solr.common.SolrDocumentList) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) NamedList(org.apache.solr.common.util.NamedList) AbstractIterator(com.google.common.collect.AbstractIterator) SolrDocumentList(org.apache.solr.common.SolrDocumentList) Deque(java.util.Deque) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) Collection(java.util.Collection) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) WeakHashMap(java.util.WeakHashMap)

Example 10 with AbstractIterator

use of com.google.common.collect.AbstractIterator in project atlasdb by palantir.

the class DbKvs method loadColumnsForBatches.

private Iterator<Iterator<Map.Entry<Cell, Value>>> loadColumnsForBatches(TableReference tableRef, ColumnRangeSelection columnRangeSelection, long timestamp, Map<Sha256Hash, byte[]> rowHashesToBytes, Iterator<Map<Sha256Hash, Integer>> batches, Map<Sha256Hash, Integer> columnCountByRowHash) {
    Iterator<Iterator<Map.Entry<Cell, Value>>> results = new AbstractIterator<Iterator<Map.Entry<Cell, Value>>>() {

        private Sha256Hash lastRowHashInPreviousBatch = null;

        private byte[] lastColumnInPreviousBatch = null;

        @Override
        protected Iterator<Map.Entry<Cell, Value>> computeNext() {
            if (!batches.hasNext()) {
                return endOfData();
            }
            Map<Sha256Hash, Integer> currentBatch = batches.next();
            RowsColumnRangeBatchRequest columnRangeSelectionsByRow = getBatchColumnRangeSelectionsByRow(currentBatch, columnCountByRowHash);
            Map<byte[], List<Map.Entry<Cell, Value>>> resultsByRow = extractRowColumnRangePage(tableRef, columnRangeSelectionsByRow, timestamp);
            int totalEntries = resultsByRow.values().stream().mapToInt(List::size).sum();
            if (totalEntries == 0) {
                return Collections.emptyIterator();
            }
            // Ensure order matches that of the provided batch.
            List<Map.Entry<Cell, Value>> loadedColumns = new ArrayList<>(totalEntries);
            for (Sha256Hash rowHash : currentBatch.keySet()) {
                byte[] row = rowHashesToBytes.get(rowHash);
                loadedColumns.addAll(resultsByRow.get(row));
            }
            Cell lastCell = Iterables.getLast(loadedColumns).getKey();
            lastRowHashInPreviousBatch = Sha256Hash.computeHash(lastCell.getRowName());
            lastColumnInPreviousBatch = lastCell.getColumnName();
            return loadedColumns.iterator();
        }

        private RowsColumnRangeBatchRequest getBatchColumnRangeSelectionsByRow(Map<Sha256Hash, Integer> columnCountsByRowHashInBatch, Map<Sha256Hash, Integer> totalColumnCountsByRowHash) {
            ImmutableRowsColumnRangeBatchRequest.Builder rowsColumnRangeBatch = ImmutableRowsColumnRangeBatchRequest.builder().columnRangeSelection(columnRangeSelection);
            Iterator<Map.Entry<Sha256Hash, Integer>> entries = columnCountsByRowHashInBatch.entrySet().iterator();
            while (entries.hasNext()) {
                Map.Entry<Sha256Hash, Integer> entry = entries.next();
                Sha256Hash rowHash = entry.getKey();
                byte[] row = rowHashesToBytes.get(rowHash);
                boolean isPartialFirstRow = Objects.equals(lastRowHashInPreviousBatch, rowHash);
                if (isPartialFirstRow) {
                    byte[] startCol = RangeRequests.nextLexicographicName(lastColumnInPreviousBatch);
                    BatchColumnRangeSelection columnRange = BatchColumnRangeSelection.create(startCol, columnRangeSelection.getEndCol(), entry.getValue());
                    rowsColumnRangeBatch.partialFirstRow(Maps.immutableEntry(row, columnRange));
                    continue;
                }
                boolean isFullyLoadedRow = totalColumnCountsByRowHash.get(rowHash).equals(entry.getValue());
                if (isFullyLoadedRow) {
                    rowsColumnRangeBatch.addRowsToLoadFully(row);
                } else {
                    Preconditions.checkArgument(!entries.hasNext(), "Only the last row should be partial.");
                    BatchColumnRangeSelection columnRange = BatchColumnRangeSelection.create(columnRangeSelection, entry.getValue());
                    rowsColumnRangeBatch.partialLastRow(Maps.immutableEntry(row, columnRange));
                }
            }
            return rowsColumnRangeBatch.build();
        }
    };
    return results;
}
Also used : BatchColumnRangeSelection(com.palantir.atlasdb.keyvalue.api.BatchColumnRangeSelection) Sha256Hash(com.palantir.util.crypto.Sha256Hash) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry) AbstractIterator(com.google.common.collect.AbstractIterator) LocalRowColumnRangeIterator(com.palantir.atlasdb.keyvalue.impl.LocalRowColumnRangeIterator) ListIterator(java.util.ListIterator) ClosableIterator(com.palantir.common.base.ClosableIterator) RowColumnRangeIterator(com.palantir.atlasdb.keyvalue.api.RowColumnRangeIterator) Iterator(java.util.Iterator) Value(com.palantir.atlasdb.keyvalue.api.Value) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) AbstractIterator(com.google.common.collect.AbstractIterator) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) NavigableMap(java.util.NavigableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Cell(com.palantir.atlasdb.keyvalue.api.Cell)

Aggregations

AbstractIterator (com.google.common.collect.AbstractIterator)55 IOException (java.io.IOException)15 Iterator (java.util.Iterator)14 Map (java.util.Map)8 ArrayList (java.util.ArrayList)7 List (java.util.List)6 File (java.io.File)5 EOFException (java.io.EOFException)4 Collection (java.util.Collection)4 HashSet (java.util.HashSet)4 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)3 SMALLINT (com.facebook.presto.common.type.SmallintType.SMALLINT)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Deque (java.util.Deque)3 Set (java.util.Set)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 CyclicBarrier (java.util.concurrent.CyclicBarrier)3 Test (org.junit.Test)3 TopicMetadata (co.cask.cdap.messaging.TopicMetadata)2 TopicId (co.cask.cdap.proto.id.TopicId)2