use of io.crate.execution.jobs.SharedShardContext in project crate by crate.
the class ShardCollectSource method createMultiShardScoreDocCollector.
private CompletableFuture<BatchIterator<Row>> createMultiShardScoreDocCollector(RoutedCollectPhase collectPhase, boolean supportMoveToStart, CollectTask collectTask, String localNodeId) {
Map<String, Map<String, IntIndexedContainer>> locations = collectPhase.routing().locations();
SharedShardContexts sharedShardContexts = collectTask.sharedShardContexts();
Map<String, IntIndexedContainer> indexShards = locations.get(localNodeId);
List<CompletableFuture<OrderedDocCollector>> orderedDocCollectors = new ArrayList<>();
Metadata metadata = clusterService.state().metadata();
for (Map.Entry<String, IntIndexedContainer> entry : indexShards.entrySet()) {
String indexName = entry.getKey();
Index index = metadata.index(indexName).getIndex();
for (IntCursor shard : entry.getValue()) {
ShardId shardId = new ShardId(index, shard.value);
try {
SharedShardContext context = sharedShardContexts.getOrCreateContext(shardId);
ShardCollectorProvider shardCollectorProvider = getCollectorProviderSafe(shardId);
orderedDocCollectors.add(shardCollectorProvider.getFutureOrderedCollector(collectPhase, context, collectTask, supportMoveToStart));
} catch (ShardNotFoundException | IllegalIndexShardStateException e) {
throw e;
} catch (IndexNotFoundException e) {
if (IndexParts.isPartitioned(indexName)) {
break;
}
throw e;
}
}
}
List<DataType<?>> columnTypes = Symbols.typeView(collectPhase.toCollect());
OrderBy orderBy = collectPhase.orderBy();
assert orderBy != null : "orderBy must not be null";
return CompletableFutures.allAsList(orderedDocCollectors).thenApply(collectors -> OrderedLuceneBatchIteratorFactory.newInstance(collectors, OrderingByPosition.rowOrdering(OrderByPositionVisitor.orderByPositions(orderBy.orderBySymbols(), collectPhase.toCollect()), orderBy.reverseFlags(), orderBy.nullsFirst()), new RowAccountingWithEstimators(columnTypes, collectTask.getRamAccounting()), executor, availableThreads, supportMoveToStart));
}
use of io.crate.execution.jobs.SharedShardContext in project crate by crate.
the class FetchTask method start.
@Override
public void start() {
synchronized (jobId) {
if (killed != null) {
result.completeExceptionally(killed);
return;
}
HashMap<String, RelationName> index2TableIdent = new HashMap<>();
for (Map.Entry<RelationName, Collection<String>> entry : phase.tableIndices().entrySet()) {
for (String indexName : entry.getValue()) {
index2TableIdent.put(indexName, entry.getKey());
}
}
Set<RelationName> tablesWithFetchRefs = new HashSet<>();
for (Reference reference : phase.fetchRefs()) {
tablesWithFetchRefs.add(reference.ident().tableIdent());
}
String source = "fetch-task: " + jobId.toString() + '-' + phase.phaseId() + '-' + phase.name();
for (Routing routing : routingIterable) {
Map<String, Map<String, IntIndexedContainer>> locations = routing.locations();
Map<String, IntIndexedContainer> indexShards = locations.get(localNodeId);
for (Map.Entry<String, IntIndexedContainer> indexShardsEntry : indexShards.entrySet()) {
String indexName = indexShardsEntry.getKey();
Integer base = phase.bases().get(indexName);
if (base == null) {
continue;
}
IndexMetadata indexMetadata = metadata.index(indexName);
if (indexMetadata == null) {
if (IndexParts.isPartitioned(indexName)) {
continue;
}
throw new IndexNotFoundException(indexName);
}
Index index = indexMetadata.getIndex();
RelationName ident = index2TableIdent.get(indexName);
assert ident != null : "no relationName found for index " + indexName;
tableIdents.put(base, ident);
toFetch.put(ident, new ArrayList<>());
for (IntCursor shard : indexShardsEntry.getValue()) {
ShardId shardId = new ShardId(index, shard.value);
int readerId = base + shardId.id();
SharedShardContext shardContext = shardContexts.get(readerId);
if (shardContext == null) {
try {
shardContext = sharedShardContexts.createContext(shardId, readerId);
shardContexts.put(readerId, shardContext);
if (tablesWithFetchRefs.contains(ident)) {
searchers.put(readerId, shardContext.acquireSearcher(source));
}
} catch (IndexNotFoundException e) {
if (!IndexParts.isPartitioned(indexName)) {
throw e;
}
}
}
}
}
}
for (Reference reference : phase.fetchRefs()) {
Collection<Reference> references = toFetch.get(reference.ident().tableIdent());
if (references != null) {
references.add(reference);
}
}
}
if (searchers.isEmpty() || phase.fetchRefs().isEmpty()) {
// no fetch references means there will be no fetch requests
// this context is only here to allow the collectors to generate docids with the right bases
// the bases are fetched in the prepare phase therefore this context can be closed
close();
}
}
use of io.crate.execution.jobs.SharedShardContext in project crate by crate.
the class GroupByOptimizedIterator method tryOptimizeSingleStringKey.
@Nullable
static BatchIterator<Row> tryOptimizeSingleStringKey(IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, BigArrays bigArrays, InputFactory inputFactory, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
GroupProjection groupProjection = getSingleStringKeyGroupProjection(shardProjections);
if (groupProjection == null) {
return null;
}
assert groupProjection.keys().size() == 1 : "Must have 1 key if getSingleStringKeyGroupProjection returned a projection";
Reference keyRef = getKeyRef(collectPhase.toCollect(), groupProjection.keys().get(0));
if (keyRef == null) {
// group by on non-reference
return null;
}
keyRef = (Reference) DocReferences.inverseSourceLookup(keyRef);
MappedFieldType keyFieldType = fieldTypeLookup.get(keyRef.column().fqn());
if (keyFieldType == null || !keyFieldType.hasDocValues()) {
return null;
}
if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
// to keep the optimized implementation a bit simpler
return null;
}
if (hasHighCardinalityRatio(() -> indexShard.acquireSearcher("group-by-cardinality-check"), keyFieldType.name())) {
return null;
}
ShardId shardId = indexShard.shardId();
SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
var searcher = sharedShardContext.acquireSearcher("group-by-ordinals:" + formatSource(collectPhase));
collectTask.addSearcher(sharedShardContext.readerId(), searcher);
final QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
docCtx.add(collectPhase.toCollect().stream()::iterator);
InputFactory.Context<CollectExpression<Row, ?>> ctxForAggregations = inputFactory.ctxForAggregations(collectTask.txnCtx());
ctxForAggregations.add(groupProjection.values());
final List<CollectExpression<Row, ?>> aggExpressions = ctxForAggregations.expressions();
List<AggregationContext> aggregations = ctxForAggregations.aggregations();
List<? extends LuceneCollectorExpression<?>> expressions = docCtx.expressions();
RamAccounting ramAccounting = collectTask.getRamAccounting();
CollectorContext collectorContext = new CollectorContext(sharedShardContext.readerId());
InputRow inputRow = new InputRow(docCtx.topLevelInputs());
LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
return getIterator(bigArrays, searcher.item(), keyRef.column().fqn(), aggregations, expressions, aggExpressions, ramAccounting, collectTask.memoryManager(), collectTask.minNodeVersion(), inputRow, queryContext.query(), collectorContext, groupProjection.mode());
}
use of io.crate.execution.jobs.SharedShardContext in project crate by crate.
the class LuceneShardCollectorProvider method getUnorderedIterator.
@Override
protected BatchIterator<Row> getUnorderedIterator(RoutedCollectPhase collectPhase, boolean requiresScroll, CollectTask collectTask) {
ShardId shardId = indexShard.shardId();
SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
var searcher = sharedShardContext.acquireSearcher("unordered-iterator: " + formatSource(collectPhase));
collectTask.addSearcher(sharedShardContext.readerId(), searcher);
IndexShard sharedShardContextShard = sharedShardContext.indexShard();
// A closed shard has no mapper service and cannot be queried with lucene,
// therefore skip it
boolean isClosed = sharedShardContextShard.mapperService() == null;
if (isClosed) {
return InMemoryBatchIterator.empty(SentinelRow.SENTINEL);
}
QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), sharedShardContextShard.mapperService(), sharedShardContextShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.extractImplementations(collectTask.txnCtx(), collectPhase);
return new LuceneBatchIterator(searcher.item(), queryContext.query(), queryContext.minScore(), Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE), new CollectorContext(sharedShardContext.readerId()), docCtx.topLevelInputs(), docCtx.expressions());
}
use of io.crate.execution.jobs.SharedShardContext in project crate by crate.
the class DocValuesGroupByOptimizedIterator method tryOptimize.
@Nullable
static BatchIterator<Row> tryOptimize(Functions functions, IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
return null;
}
Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
GroupProjection groupProjection = getSinglePartialGroupProjection(shardProjections);
if (groupProjection == null) {
return null;
}
ArrayList<Reference> columnKeyRefs = new ArrayList<>(groupProjection.keys().size());
for (var key : groupProjection.keys()) {
var docKeyRef = getKeyRef(collectPhase.toCollect(), key);
if (docKeyRef == null) {
// group by on non-reference
return null;
}
var columnKeyRef = (Reference) DocReferences.inverseSourceLookup(docKeyRef);
var keyFieldType = fieldTypeLookup.get(columnKeyRef.column().fqn());
if (keyFieldType == null || !keyFieldType.hasDocValues()) {
return null;
} else {
columnKeyRefs.add(columnKeyRef);
}
}
// noinspection rawtypes
List<DocValueAggregator> aggregators = DocValuesAggregates.createAggregators(functions, groupProjection.values(), collectPhase.toCollect(), collectTask.txnCtx().sessionSettings().searchPath(), table);
if (aggregators == null) {
return null;
}
ShardId shardId = indexShard.shardId();
SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
var searcher = sharedShardContext.acquireSearcher("group-by-doc-value-aggregates: " + formatSource(collectPhase));
collectTask.addSearcher(sharedShardContext.readerId(), searcher);
QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
List<LuceneCollectorExpression<?>> keyExpressions = new ArrayList<>();
for (var keyRef : columnKeyRefs) {
keyExpressions.add((LuceneCollectorExpression<?>) docCtx.add(keyRef));
}
LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
if (columnKeyRefs.size() == 1) {
return GroupByIterator.forSingleKey(aggregators, searcher.item(), columnKeyRefs.get(0), keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
} else {
return GroupByIterator.forManyKeys(aggregators, searcher.item(), columnKeyRefs, keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
}
}
Aggregations