Search in sources :

Example 36 with BatchIterator

use of io.crate.data.BatchIterator in project crate by crate.

the class FileReadingIteratorTest method testIteratorContract_givenCSVInputFormat__AndNoRelevantFileExtension_thenWritesAsMap.

@Test
public void testIteratorContract_givenCSVInputFormat__AndNoRelevantFileExtension_thenWritesAsMap() throws Exception {
    tempFilePath = createTempFile("tempfile", ".any-suffix");
    tmpFile = tempFilePath.toFile();
    try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tmpFile), StandardCharsets.UTF_8)) {
        writer.write("name,id,age\n");
        writer.write("Arthur,4,38\n");
        writer.write("Trillian,5,33\n");
    }
    fileUri = tempFilePath.toUri().toString();
    Supplier<BatchIterator<Row>> batchIteratorSupplier = () -> createBatchIterator(Collections.singletonList(fileUri), CSV);
    List<Object[]> expectedResult = Arrays.asList(new Object[] { CSV_AS_MAP_FIRST_LINE }, new Object[] { CSV_AS_MAP_SECOND_LINE });
    BatchIteratorTester tester = new BatchIteratorTester(batchIteratorSupplier);
    tester.verifyResultAndEdgeCaseBehaviour(expectedResult);
}
Also used : FileOutputStream(java.io.FileOutputStream) BatchIteratorTester(io.crate.testing.BatchIteratorTester) OutputStreamWriter(java.io.OutputStreamWriter) BatchIterator(io.crate.data.BatchIterator) Test(org.junit.Test)

Example 37 with BatchIterator

use of io.crate.data.BatchIterator in project crate by crate.

the class NestedLoopBatchIteratorsTest method testSemiJoinBatchedSource.

@Test
public void testSemiJoinBatchedSource() throws Exception {
    Supplier<BatchIterator<Row>> batchIteratorSupplier = () -> new SemiJoinNLBatchIterator<>(new BatchSimulatingIterator<>(TestingBatchIterators.range(0, 5), 2, 2, null), new BatchSimulatingIterator<>(TestingBatchIterators.range(2, 6), 2, 2, null), new CombinedRow(1, 1), getCol0EqCol1JoinCondition());
    BatchIteratorTester tester = new BatchIteratorTester(batchIteratorSupplier);
    tester.verifyResultAndEdgeCaseBehaviour(semiJoinResult);
}
Also used : BatchIteratorTester(io.crate.testing.BatchIteratorTester) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) BatchIterator(io.crate.data.BatchIterator) Test(org.junit.Test)

Example 38 with BatchIterator

use of io.crate.data.BatchIterator in project crate by crate.

the class NestedLoopBatchIteratorsTest method testLeftJoin.

@Test
public void testLeftJoin() throws Exception {
    Supplier<BatchIterator<Row>> batchIteratorSupplier = () -> new LeftJoinNLBatchIterator<>(TestingBatchIterators.range(0, 4), TestingBatchIterators.range(2, 6), new CombinedRow(1, 1), getCol0EqCol1JoinCondition());
    BatchIteratorTester tester = new BatchIteratorTester(batchIteratorSupplier);
    tester.verifyResultAndEdgeCaseBehaviour(leftJoinResult);
}
Also used : BatchIteratorTester(io.crate.testing.BatchIteratorTester) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) BatchIterator(io.crate.data.BatchIterator) Test(org.junit.Test)

Example 39 with BatchIterator

use of io.crate.data.BatchIterator in project crate by crate.

the class ShardCollectSource method getIterator.

@Override
public CompletableFuture<BatchIterator<Row>> getIterator(TransactionContext txnCtx, CollectPhase phase, CollectTask collectTask, boolean supportMoveToStart) {
    RoutedCollectPhase collectPhase = (RoutedCollectPhase) phase;
    String localNodeId = clusterService.localNode().getId();
    Projectors projectors = new Projectors(collectPhase.projections(), collectPhase.jobId(), collectTask.txnCtx(), collectTask.getRamAccounting(), collectTask.memoryManager(), sharedProjectorFactory);
    boolean requireMoveToStartSupport = supportMoveToStart && !projectors.providesIndependentScroll();
    if (collectPhase.maxRowGranularity() == RowGranularity.SHARD) {
        return CompletableFuture.completedFuture(projectors.wrap(InMemoryBatchIterator.of(getShardsIterator(collectTask.txnCtx(), collectPhase, localNodeId), SentinelRow.SENTINEL, true)));
    }
    OrderBy orderBy = collectPhase.orderBy();
    if (collectPhase.maxRowGranularity() == RowGranularity.DOC && orderBy != null) {
        return createMultiShardScoreDocCollector(collectPhase, requireMoveToStartSupport, collectTask, localNodeId).thenApply(projectors::wrap);
    }
    boolean hasShardProjections = Projections.hasAnyShardProjections(collectPhase.projections());
    Map<String, IntIndexedContainer> indexShards = collectPhase.routing().locations().get(localNodeId);
    List<CompletableFuture<BatchIterator<Row>>> iterators = indexShards == null ? Collections.emptyList() : getIterators(collectTask, collectPhase, requireMoveToStartSupport, indexShards);
    final CompletableFuture<BatchIterator<Row>> result;
    switch(iterators.size()) {
        case 0:
            result = CompletableFuture.completedFuture(InMemoryBatchIterator.empty(SentinelRow.SENTINEL));
            break;
        case 1:
            result = iterators.get(0);
            break;
        default:
            if (hasShardProjections) {
                // use AsyncCompositeBatchIterator for multi-threaded loadNextBatch
                // in order to process shard-based projections concurrently
                // noinspection unchecked
                result = CompletableFutures.allAsList(iterators).thenApply(its -> CompositeBatchIterator.asyncComposite(executor, availableThreads, its.toArray(new BatchIterator[0])));
            } else {
                // noinspection unchecked
                result = CompletableFutures.allAsList(iterators).thenApply(its -> CompositeBatchIterator.seqComposite(its.toArray(new BatchIterator[0])));
            }
    }
    return result.thenApply(it -> projectors.wrap(it));
}
Also used : OrderBy(io.crate.analyze.OrderBy) ShardId(org.elasticsearch.index.shard.ShardId) IndexParts(io.crate.metadata.IndexParts) TransactionContext(io.crate.metadata.TransactionContext) Projections(io.crate.execution.dsl.projection.Projections) IntCursor(com.carrotsearch.hppc.cursors.IntCursor) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) OrderedDocCollector(io.crate.execution.engine.collect.collectors.OrderedDocCollector) EvaluatingNormalizer(io.crate.expression.eval.EvaluatingNormalizer) NodeLimits(io.crate.execution.jobs.NodeLimits) ShardNotFoundException(org.elasticsearch.index.shard.ShardNotFoundException) ProjectorFactory(io.crate.execution.engine.pipeline.ProjectorFactory) Settings(org.elasticsearch.common.settings.Settings) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) Map(java.util.Map) SharedShardContexts(io.crate.execution.jobs.SharedShardContexts) ThreadPool(org.elasticsearch.threadpool.ThreadPool) ThreadPools.numIdleThreads(io.crate.execution.support.ThreadPools.numIdleThreads) OrderingByPosition(io.crate.execution.engine.sort.OrderingByPosition) ShardRowContext(io.crate.expression.reference.sys.shard.ShardRowContext) DocSysColumns(io.crate.metadata.doc.DocSysColumns) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) NodeContext(io.crate.metadata.NodeContext) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) OrderedLuceneBatchIteratorFactory(io.crate.execution.engine.collect.collectors.OrderedLuceneBatchIteratorFactory) StaticTableReferenceResolver(io.crate.expression.reference.StaticTableReferenceResolver) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase) CompletableFutures(io.crate.concurrent.CompletableFutures) RowsTransformer(io.crate.execution.engine.collect.RowsTransformer) IllegalIndexShardStateException(org.elasticsearch.index.shard.IllegalIndexShardStateException) Iterables(io.crate.common.collections.Iterables) CollectTask(io.crate.execution.engine.collect.CollectTask) SysShardsTableInfo(io.crate.metadata.sys.SysShardsTableInfo) OrderByPositionVisitor(io.crate.planner.consumer.OrderByPositionVisitor) List(java.util.List) Exceptions(io.crate.exceptions.Exceptions) Logger(org.apache.logging.log4j.Logger) OrderBy(io.crate.analyze.OrderBy) Row(io.crate.data.Row) Singleton(org.elasticsearch.common.inject.Singleton) Projectors(io.crate.execution.engine.pipeline.Projectors) SharedShardContext(io.crate.execution.jobs.SharedShardContext) SentinelRow(io.crate.data.SentinelRow) MapBackedRefResolver(io.crate.metadata.MapBackedRefResolver) CompositeBatchIterator(io.crate.data.CompositeBatchIterator) RemoteCollectorFactory(io.crate.execution.engine.collect.RemoteCollectorFactory) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ClusterService(org.elasticsearch.cluster.service.ClusterService) RowAccountingWithEstimators(io.crate.breaker.RowAccountingWithEstimators) CompletableFuture(java.util.concurrent.CompletableFuture) BatchIterator(io.crate.data.BatchIterator) Index(org.elasticsearch.index.Index) ShardRoutingState(org.elasticsearch.cluster.routing.ShardRoutingState) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Inject(org.elasticsearch.common.inject.Inject) CircuitBreakerService(org.elasticsearch.indices.breaker.CircuitBreakerService) Metadata(org.elasticsearch.cluster.metadata.Metadata) UnassignedShard(io.crate.metadata.shard.unassigned.UnassignedShard) Symbols(io.crate.expression.symbol.Symbols) CollectPhase(io.crate.execution.dsl.phases.CollectPhase) IndicesService(org.elasticsearch.indices.IndicesService) IntSupplier(java.util.function.IntSupplier) Nullable(javax.annotation.Nullable) EsExecutors(org.elasticsearch.common.util.concurrent.EsExecutors) IndexEventListener(org.elasticsearch.index.shard.IndexEventListener) Executor(java.util.concurrent.Executor) IndexService(org.elasticsearch.index.IndexService) IndexShard(org.elasticsearch.index.shard.IndexShard) DataType(io.crate.types.DataType) ProjectionToProjectorVisitor(io.crate.execution.engine.pipeline.ProjectionToProjectorVisitor) TransportActionProvider(io.crate.execution.TransportActionProvider) RowGranularity(io.crate.metadata.RowGranularity) ShardCollectorProvider(io.crate.execution.engine.collect.ShardCollectorProvider) Suppliers(io.crate.common.Suppliers) InputFactory(io.crate.expression.InputFactory) Collections(java.util.Collections) LogManager(org.apache.logging.log4j.LogManager) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) CompositeBatchIterator(io.crate.data.CompositeBatchIterator) BatchIterator(io.crate.data.BatchIterator) Projectors(io.crate.execution.engine.pipeline.Projectors) CompletableFuture(java.util.concurrent.CompletableFuture) Row(io.crate.data.Row) SentinelRow(io.crate.data.SentinelRow) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase)

Example 40 with BatchIterator

use of io.crate.data.BatchIterator in project crate by crate.

the class ShardCollectSource method getIterators.

private List<CompletableFuture<BatchIterator<Row>>> getIterators(CollectTask collectTask, RoutedCollectPhase collectPhase, boolean requiresScroll, Map<String, IntIndexedContainer> indexShards) {
    Metadata metadata = clusterService.state().metadata();
    List<CompletableFuture<BatchIterator<Row>>> iterators = new ArrayList<>();
    for (Map.Entry<String, IntIndexedContainer> entry : indexShards.entrySet()) {
        String indexName = entry.getKey();
        IndexMetadata indexMD = metadata.index(indexName);
        if (indexMD == null) {
            if (IndexParts.isPartitioned(indexName)) {
                continue;
            }
            throw new IndexNotFoundException(indexName);
        }
        Index index = indexMD.getIndex();
        try {
            indicesService.indexServiceSafe(index);
        } catch (IndexNotFoundException e) {
            if (IndexParts.isPartitioned(indexName)) {
                continue;
            }
            throw e;
        }
        for (IntCursor shardCursor : entry.getValue()) {
            ShardId shardId = new ShardId(index, shardCursor.value);
            try {
                ShardCollectorProvider shardCollectorProvider = getCollectorProviderSafe(shardId);
                CompletableFuture<BatchIterator<Row>> iterator = shardCollectorProvider.getFutureIterator(collectPhase, requiresScroll, collectTask);
                iterators.add(iterator);
            } catch (ShardNotFoundException | IllegalIndexShardStateException e) {
                // and the reader required in the fetchPhase would be missing.
                if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.FETCHID)) {
                    throw e;
                }
                iterators.add(remoteCollectorFactory.createCollector(shardId, collectPhase, collectTask, shardCollectorProviderFactory));
            } catch (IndexNotFoundException e) {
                // Prevent wrapping this to not break retry-detection
                throw e;
            } catch (Throwable t) {
                Exceptions.rethrowRuntimeException(t);
            }
        }
    }
    return iterators;
}
Also used : IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) ArrayList(java.util.ArrayList) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) Index(org.elasticsearch.index.Index) InMemoryBatchIterator(io.crate.data.InMemoryBatchIterator) CompositeBatchIterator(io.crate.data.CompositeBatchIterator) BatchIterator(io.crate.data.BatchIterator) IllegalIndexShardStateException(org.elasticsearch.index.shard.IllegalIndexShardStateException) ShardId(org.elasticsearch.index.shard.ShardId) CompletableFuture(java.util.concurrent.CompletableFuture) ShardNotFoundException(org.elasticsearch.index.shard.ShardNotFoundException) IntCursor(com.carrotsearch.hppc.cursors.IntCursor) ShardCollectorProvider(io.crate.execution.engine.collect.ShardCollectorProvider) IndexNotFoundException(org.elasticsearch.index.IndexNotFoundException) Row(io.crate.data.Row) SentinelRow(io.crate.data.SentinelRow) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

BatchIterator (io.crate.data.BatchIterator)50 Test (org.junit.Test)37 BatchIteratorTester (io.crate.testing.BatchIteratorTester)22 InMemoryBatchIterator (io.crate.data.InMemoryBatchIterator)17 Row (io.crate.data.Row)16 ArrayList (java.util.ArrayList)10 CrateUnitTest (io.crate.test.integration.CrateUnitTest)8 List (java.util.List)8 Map (java.util.Map)7 CompletableFuture (java.util.concurrent.CompletableFuture)7 Bucket (io.crate.data.Bucket)6 InputFactory (io.crate.expression.InputFactory)6 Symbol (io.crate.analyze.symbol.Symbol)4 RowAccounting (io.crate.breaker.RowAccounting)4 RowN (io.crate.data.RowN)4 CombinedRow (io.crate.data.join.CombinedRow)4 InputFactory (io.crate.operation.InputFactory)4 TestingHelpers.isRow (io.crate.testing.TestingHelpers.isRow)4 UUID (java.util.UUID)4 ClusterService (org.elasticsearch.cluster.service.ClusterService)4