Search in sources :

Example 1 with SharedShardContext

use of io.crate.execution.jobs.SharedShardContext in project crate by crate.

the class ShardCollectSource method createMultiShardScoreDocCollector.

private CompletableFuture<BatchIterator<Row>> createMultiShardScoreDocCollector(RoutedCollectPhase collectPhase, boolean supportMoveToStart, CollectTask collectTask, String localNodeId) {
    Map<String, Map<String, IntIndexedContainer>> locations = collectPhase.routing().locations();
    SharedShardContexts sharedShardContexts = collectTask.sharedShardContexts();
    Map<String, IntIndexedContainer> indexShards = locations.get(localNodeId);
    List<CompletableFuture<OrderedDocCollector>> orderedDocCollectors = new ArrayList<>();
    Metadata metadata = clusterService.state().metadata();
    for (Map.Entry<String, IntIndexedContainer> entry : indexShards.entrySet()) {
        String indexName = entry.getKey();
        Index index = metadata.index(indexName).getIndex();
        for (IntCursor shard : entry.getValue()) {
            ShardId shardId = new ShardId(index, shard.value);
            try {
                SharedShardContext context = sharedShardContexts.getOrCreateContext(shardId);
                ShardCollectorProvider shardCollectorProvider = getCollectorProviderSafe(shardId);
                orderedDocCollectors.add(shardCollectorProvider.getFutureOrderedCollector(collectPhase, context, collectTask, supportMoveToStart));
            } catch (ShardNotFoundException | IllegalIndexShardStateException e) {
                throw e;
            } catch (IndexNotFoundException e) {
                if (IndexParts.isPartitioned(indexName)) {
                    break;
                }
                throw e;
            }
        }
    }
    List<DataType<?>> columnTypes = Symbols.typeView(collectPhase.toCollect());
    OrderBy orderBy = collectPhase.orderBy();
    assert orderBy != null : "orderBy must not be null";
    return CompletableFutures.allAsList(orderedDocCollectors).thenApply(collectors -> OrderedLuceneBatchIteratorFactory.newInstance(collectors, OrderingByPosition.rowOrdering(OrderByPositionVisitor.orderByPositions(orderBy.orderBySymbols(), collectPhase.toCollect()), orderBy.reverseFlags(), orderBy.nullsFirst()), new RowAccountingWithEstimators(columnTypes, collectTask.getRamAccounting()), executor, availableThreads, supportMoveToStart));
}
Also used : OrderBy(io.crate.analyze.OrderBy) ArrayList(java.util.ArrayList) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) Index(org.elasticsearch.index.Index) IllegalIndexShardStateException(org.elasticsearch.index.shard.IllegalIndexShardStateException) ShardId(org.elasticsearch.index.shard.ShardId) CompletableFuture(java.util.concurrent.CompletableFuture) SharedShardContexts(io.crate.execution.jobs.SharedShardContexts) ShardNotFoundException(org.elasticsearch.index.shard.ShardNotFoundException) RowAccountingWithEstimators(io.crate.breaker.RowAccountingWithEstimators) IntCursor(com.carrotsearch.hppc.cursors.IntCursor) ShardCollectorProvider(io.crate.execution.engine.collect.ShardCollectorProvider) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) DataType(io.crate.types.DataType) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SharedShardContext(io.crate.execution.jobs.SharedShardContext)

Example 2 with SharedShardContext

use of io.crate.execution.jobs.SharedShardContext in project crate by crate.

the class FetchTask method start.

@Override
public void start() {
    synchronized (jobId) {
        if (killed != null) {
            result.completeExceptionally(killed);
            return;
        }
        HashMap<String, RelationName> index2TableIdent = new HashMap<>();
        for (Map.Entry<RelationName, Collection<String>> entry : phase.tableIndices().entrySet()) {
            for (String indexName : entry.getValue()) {
                index2TableIdent.put(indexName, entry.getKey());
            }
        }
        Set<RelationName> tablesWithFetchRefs = new HashSet<>();
        for (Reference reference : phase.fetchRefs()) {
            tablesWithFetchRefs.add(reference.ident().tableIdent());
        }
        String source = "fetch-task: " + jobId.toString() + '-' + phase.phaseId() + '-' + phase.name();
        for (Routing routing : routingIterable) {
            Map<String, Map<String, IntIndexedContainer>> locations = routing.locations();
            Map<String, IntIndexedContainer> indexShards = locations.get(localNodeId);
            for (Map.Entry<String, IntIndexedContainer> indexShardsEntry : indexShards.entrySet()) {
                String indexName = indexShardsEntry.getKey();
                Integer base = phase.bases().get(indexName);
                if (base == null) {
                    continue;
                }
                IndexMetadata indexMetadata = metadata.index(indexName);
                if (indexMetadata == null) {
                    if (IndexParts.isPartitioned(indexName)) {
                        continue;
                    }
                    throw new IndexNotFoundException(indexName);
                }
                Index index = indexMetadata.getIndex();
                RelationName ident = index2TableIdent.get(indexName);
                assert ident != null : "no relationName found for index " + indexName;
                tableIdents.put(base, ident);
                toFetch.put(ident, new ArrayList<>());
                for (IntCursor shard : indexShardsEntry.getValue()) {
                    ShardId shardId = new ShardId(index, shard.value);
                    int readerId = base + shardId.id();
                    SharedShardContext shardContext = shardContexts.get(readerId);
                    if (shardContext == null) {
                        try {
                            shardContext = sharedShardContexts.createContext(shardId, readerId);
                            shardContexts.put(readerId, shardContext);
                            if (tablesWithFetchRefs.contains(ident)) {
                                searchers.put(readerId, shardContext.acquireSearcher(source));
                            }
                        } catch (IndexNotFoundException e) {
                            if (!IndexParts.isPartitioned(indexName)) {
                                throw e;
                            }
                        }
                    }
                }
            }
        }
        for (Reference reference : phase.fetchRefs()) {
            Collection<Reference> references = toFetch.get(reference.ident().tableIdent());
            if (references != null) {
                references.add(reference);
            }
        }
    }
    if (searchers.isEmpty() || phase.fetchRefs().isEmpty()) {
        // no fetch references means there will be no fetch requests
        // this context is only here to allow the collectors to generate docids with the right bases
        // the bases are fetched in the prepare phase therefore this context can be closed
        close();
    }
}
Also used : HashMap(java.util.HashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) Index(org.elasticsearch.index.Index) ShardId(org.elasticsearch.index.shard.ShardId) IntCursor(com.carrotsearch.hppc.cursors.IntCursor) RelationName(io.crate.metadata.RelationName) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet) Reference(io.crate.metadata.Reference) Routing(io.crate.metadata.Routing) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) Collection(java.util.Collection) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) SharedShardContext(io.crate.execution.jobs.SharedShardContext)

Example 3 with SharedShardContext

use of io.crate.execution.jobs.SharedShardContext in project crate by crate.

the class GroupByOptimizedIterator method tryOptimizeSingleStringKey.

@Nullable
static BatchIterator<Row> tryOptimizeSingleStringKey(IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, BigArrays bigArrays, InputFactory inputFactory, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
    Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
    GroupProjection groupProjection = getSingleStringKeyGroupProjection(shardProjections);
    if (groupProjection == null) {
        return null;
    }
    assert groupProjection.keys().size() == 1 : "Must have 1 key if getSingleStringKeyGroupProjection returned a projection";
    Reference keyRef = getKeyRef(collectPhase.toCollect(), groupProjection.keys().get(0));
    if (keyRef == null) {
        // group by on non-reference
        return null;
    }
    keyRef = (Reference) DocReferences.inverseSourceLookup(keyRef);
    MappedFieldType keyFieldType = fieldTypeLookup.get(keyRef.column().fqn());
    if (keyFieldType == null || !keyFieldType.hasDocValues()) {
        return null;
    }
    if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
        // to keep the optimized implementation a bit simpler
        return null;
    }
    if (hasHighCardinalityRatio(() -> indexShard.acquireSearcher("group-by-cardinality-check"), keyFieldType.name())) {
        return null;
    }
    ShardId shardId = indexShard.shardId();
    SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
    var searcher = sharedShardContext.acquireSearcher("group-by-ordinals:" + formatSource(collectPhase));
    collectTask.addSearcher(sharedShardContext.readerId(), searcher);
    final QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
    InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
    docCtx.add(collectPhase.toCollect().stream()::iterator);
    InputFactory.Context<CollectExpression<Row, ?>> ctxForAggregations = inputFactory.ctxForAggregations(collectTask.txnCtx());
    ctxForAggregations.add(groupProjection.values());
    final List<CollectExpression<Row, ?>> aggExpressions = ctxForAggregations.expressions();
    List<AggregationContext> aggregations = ctxForAggregations.aggregations();
    List<? extends LuceneCollectorExpression<?>> expressions = docCtx.expressions();
    RamAccounting ramAccounting = collectTask.getRamAccounting();
    CollectorContext collectorContext = new CollectorContext(sharedShardContext.readerId());
    InputRow inputRow = new InputRow(docCtx.topLevelInputs());
    LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
    return getIterator(bigArrays, searcher.item(), keyRef.column().fqn(), aggregations, expressions, aggExpressions, ramAccounting, collectTask.memoryManager(), collectTask.minNodeVersion(), inputRow, queryContext.query(), collectorContext, groupProjection.mode());
}
Also used : AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) InputFactory(io.crate.expression.InputFactory) RamAccounting(io.crate.breaker.RamAccounting) AtomicReference(java.util.concurrent.atomic.AtomicReference) Reference(io.crate.metadata.Reference) ShardId(org.elasticsearch.index.shard.ShardId) LuceneQueryBuilder(io.crate.lucene.LuceneQueryBuilder) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) InputRow(io.crate.expression.InputRow) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) SharedShardContext(io.crate.execution.jobs.SharedShardContext) Nullable(javax.annotation.Nullable)

Example 4 with SharedShardContext

use of io.crate.execution.jobs.SharedShardContext in project crate by crate.

the class LuceneShardCollectorProvider method getUnorderedIterator.

@Override
protected BatchIterator<Row> getUnorderedIterator(RoutedCollectPhase collectPhase, boolean requiresScroll, CollectTask collectTask) {
    ShardId shardId = indexShard.shardId();
    SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
    var searcher = sharedShardContext.acquireSearcher("unordered-iterator: " + formatSource(collectPhase));
    collectTask.addSearcher(sharedShardContext.readerId(), searcher);
    IndexShard sharedShardContextShard = sharedShardContext.indexShard();
    // A closed shard has no mapper service and cannot be queried with lucene,
    // therefore skip it
    boolean isClosed = sharedShardContextShard.mapperService() == null;
    if (isClosed) {
        return InMemoryBatchIterator.empty(SentinelRow.SENTINEL);
    }
    QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
    LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), sharedShardContextShard.mapperService(), sharedShardContextShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
    InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.extractImplementations(collectTask.txnCtx(), collectPhase);
    return new LuceneBatchIterator(searcher.item(), queryContext.query(), queryContext.minScore(), Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE), new CollectorContext(sharedShardContext.readerId()), docCtx.topLevelInputs(), docCtx.expressions());
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) InputFactory(io.crate.expression.InputFactory) IndexShard(org.elasticsearch.index.shard.IndexShard) LuceneQueryBuilder(io.crate.lucene.LuceneQueryBuilder) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) SharedShardContext(io.crate.execution.jobs.SharedShardContext) LuceneBatchIterator(io.crate.execution.engine.collect.collectors.LuceneBatchIterator)

Example 5 with SharedShardContext

use of io.crate.execution.jobs.SharedShardContext in project crate by crate.

the class DocValuesGroupByOptimizedIterator method tryOptimize.

@Nullable
static BatchIterator<Row> tryOptimize(Functions functions, IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
    if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
        return null;
    }
    Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
    GroupProjection groupProjection = getSinglePartialGroupProjection(shardProjections);
    if (groupProjection == null) {
        return null;
    }
    ArrayList<Reference> columnKeyRefs = new ArrayList<>(groupProjection.keys().size());
    for (var key : groupProjection.keys()) {
        var docKeyRef = getKeyRef(collectPhase.toCollect(), key);
        if (docKeyRef == null) {
            // group by on non-reference
            return null;
        }
        var columnKeyRef = (Reference) DocReferences.inverseSourceLookup(docKeyRef);
        var keyFieldType = fieldTypeLookup.get(columnKeyRef.column().fqn());
        if (keyFieldType == null || !keyFieldType.hasDocValues()) {
            return null;
        } else {
            columnKeyRefs.add(columnKeyRef);
        }
    }
    // noinspection rawtypes
    List<DocValueAggregator> aggregators = DocValuesAggregates.createAggregators(functions, groupProjection.values(), collectPhase.toCollect(), collectTask.txnCtx().sessionSettings().searchPath(), table);
    if (aggregators == null) {
        return null;
    }
    ShardId shardId = indexShard.shardId();
    SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
    var searcher = sharedShardContext.acquireSearcher("group-by-doc-value-aggregates: " + formatSource(collectPhase));
    collectTask.addSearcher(sharedShardContext.readerId(), searcher);
    QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
    InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
    List<LuceneCollectorExpression<?>> keyExpressions = new ArrayList<>();
    for (var keyRef : columnKeyRefs) {
        keyExpressions.add((LuceneCollectorExpression<?>) docCtx.add(keyRef));
    }
    LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
    if (columnKeyRefs.size() == 1) {
        return GroupByIterator.forSingleKey(aggregators, searcher.item(), columnKeyRefs.get(0), keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
    } else {
        return GroupByIterator.forManyKeys(aggregators, searcher.item(), columnKeyRefs, keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
    }
}
Also used : InputFactory(io.crate.expression.InputFactory) DocValueAggregator(io.crate.execution.engine.aggregation.DocValueAggregator) AtomicReference(java.util.concurrent.atomic.AtomicReference) Reference(io.crate.metadata.Reference) ArrayList(java.util.ArrayList) ShardId(org.elasticsearch.index.shard.ShardId) LuceneQueryBuilder(io.crate.lucene.LuceneQueryBuilder) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) SharedShardContext(io.crate.execution.jobs.SharedShardContext) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) Nullable(javax.annotation.Nullable)

Aggregations

SharedShardContext (io.crate.execution.jobs.SharedShardContext)5 ShardId (org.elasticsearch.index.shard.ShardId)5 InputFactory (io.crate.expression.InputFactory)3 CollectorContext (io.crate.expression.reference.doc.lucene.CollectorContext)3 LuceneQueryBuilder (io.crate.lucene.LuceneQueryBuilder)3 Reference (io.crate.metadata.Reference)3 QueryShardContext (org.elasticsearch.index.query.QueryShardContext)3 IntIndexedContainer (com.carrotsearch.hppc.IntIndexedContainer)2 IntCursor (com.carrotsearch.hppc.cursors.IntCursor)2 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Nullable (javax.annotation.Nullable)2 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)2 Index (org.elasticsearch.index.Index)2 IndexNotFoundException (org.elasticsearch.index.IndexNotFoundException)2 IntObjectHashMap (com.carrotsearch.hppc.IntObjectHashMap)1 OrderBy (io.crate.analyze.OrderBy)1 RamAccounting (io.crate.breaker.RamAccounting)1