Search in sources :

Example 1 with FileLineReferenceResolver

use of io.crate.expression.reference.file.FileLineReferenceResolver in project crate by crate.

the class CsvReaderBenchmark method measureFileReadingIteratorForCSV.

@Benchmark()
public void measureFileReadingIteratorForCSV(Blackhole blackhole) {
    Reference raw = createReference("_raw", DataTypes.STRING);
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
    BatchIterator<Row> batchIterator = FileReadingIterator.newInstance(Collections.singletonList(fileUri), inputs, ctx.expressions(), null, Map.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory()), false, 1, 0, CopyFromParserProperties.DEFAULT, CSV, Settings.EMPTY);
    while (batchIterator.moveNext()) {
        blackhole.consume(batchIterator.currentElement().get(0));
    }
}
Also used : LocalFsFileInputFactory(io.crate.execution.engine.collect.files.LocalFsFileInputFactory) InputFactory(io.crate.expression.InputFactory) Input(io.crate.data.Input) LineCollectorExpression(io.crate.execution.engine.collect.files.LineCollectorExpression) LocalFsFileInputFactory(io.crate.execution.engine.collect.files.LocalFsFileInputFactory) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) Reference(io.crate.metadata.Reference) Row(io.crate.data.Row) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Example 2 with FileLineReferenceResolver

use of io.crate.expression.reference.file.FileLineReferenceResolver in project crate by crate.

the class S3FileReadingCollectorTest method createBatchIterator.

private BatchIterator<Row> createBatchIterator(Collection<String> fileUris, String compression, final S3ObjectInputStream s3InputStream, boolean collectSourceUriFailure) {
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = new ArrayList<>(2);
    Reference raw = createReference(SourceLineExpression.COLUMN_NAME, DataTypes.STRING);
    inputs.add(ctx.add(raw));
    if (collectSourceUriFailure) {
        Reference sourceUriFailure = createReference(SourceUriFailureExpression.COLUMN_NAME, DataTypes.STRING);
        // noinspection unchecked
        sourceUriFailureInput = (Input<String>) ctx.add(sourceUriFailure);
        inputs.add(sourceUriFailureInput);
    }
    return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, Map.of(S3FileInputFactory.NAME, new FileInputFactory() {

        @Override
        public FileInput create(URI uri, Settings withClauseOptions) throws IOException {
            return new S3FileInput(new S3ClientHelper() {

                @Override
                protected AmazonS3 initClient(String accessKey, String secretKey, String endpoint, String protocol) throws IOException {
                    AmazonS3 client = mock(AmazonS3Client.class);
                    ObjectListing objectListing = mock(ObjectListing.class);
                    S3ObjectSummary summary = mock(S3ObjectSummary.class);
                    S3Object s3Object = mock(S3Object.class);
                    when(client.listObjects(anyString(), anyString())).thenReturn(objectListing);
                    when(objectListing.getObjectSummaries()).thenReturn(Collections.singletonList(summary));
                    when(summary.getKey()).thenReturn("foo");
                    when(client.getObject("fakebucket", "foo")).thenReturn(s3Object);
                    when(s3Object.getObjectContent()).thenReturn(s3InputStream);
                    when(client.listNextBatchOfObjects(any(ObjectListing.class))).thenReturn(objectListing);
                    when(objectListing.isTruncated()).thenReturn(false);
                    return client;
                }
            }, uri, "https");
        }
    }), false, 1, 0, CopyFromParserProperties.DEFAULT, FileUriCollectPhase.InputFormat.JSON, Settings.EMPTY);
}
Also used : FileInputFactory(io.crate.execution.engine.collect.files.FileInputFactory) InputFactory(io.crate.expression.InputFactory) AmazonS3(com.amazonaws.services.s3.AmazonS3) LineCollectorExpression(io.crate.execution.engine.collect.files.LineCollectorExpression) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) Reference(io.crate.metadata.Reference) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) IOException(java.io.IOException) URI(java.net.URI) Input(io.crate.data.Input) FileInput(io.crate.execution.engine.collect.files.FileInput) AmazonS3Client(com.amazonaws.services.s3.AmazonS3Client) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) S3ClientHelper(io.crate.copy.s3.common.S3ClientHelper) FileInputFactory(io.crate.execution.engine.collect.files.FileInputFactory) S3Object(com.amazonaws.services.s3.model.S3Object) Settings(org.elasticsearch.common.settings.Settings)

Example 3 with FileLineReferenceResolver

use of io.crate.expression.reference.file.FileLineReferenceResolver in project crate by crate.

the class FileReadingIteratorTest method createBatchIterator.

private BatchIterator<Row> createBatchIterator(Collection<String> fileUris, FileUriCollectPhase.InputFormat format) {
    Reference raw = createReference("_raw", DataTypes.STRING);
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
    return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), null, Map.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory()), false, 1, 0, CopyFromParserProperties.DEFAULT, format, Settings.EMPTY);
}
Also used : InputFactory(io.crate.expression.InputFactory) Input(io.crate.data.Input) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) Reference(io.crate.metadata.Reference)

Example 4 with FileLineReferenceResolver

use of io.crate.expression.reference.file.FileLineReferenceResolver in project crate by crate.

the class FileReadingCollectorTest method createBatchIterator.

private BatchIterator<Row> createBatchIterator(Collection<String> fileUris, String compression, boolean collectSourceUriFailure) {
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = new ArrayList<>(2);
    Reference raw = createReference(SourceLineExpression.COLUMN_NAME, DataTypes.STRING);
    inputs.add(ctx.add(raw));
    if (collectSourceUriFailure) {
        Reference sourceUriFailure = createReference(SourceUriFailureExpression.COLUMN_NAME, DataTypes.STRING);
        // noinspection unchecked
        sourceUriFailureInput = (Input<String>) ctx.add(sourceUriFailure);
        inputs.add(sourceUriFailureInput);
    }
    return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, Map.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory()), false, 1, 0, CopyFromParserProperties.DEFAULT, FileUriCollectPhase.InputFormat.JSON, Settings.EMPTY);
}
Also used : InputFactory(io.crate.expression.InputFactory) Input(io.crate.data.Input) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) Reference(io.crate.metadata.Reference) ArrayList(java.util.ArrayList)

Example 5 with FileLineReferenceResolver

use of io.crate.expression.reference.file.FileLineReferenceResolver in project crate by crate.

the class FileCollectSource method getIterator.

@Override
public CompletableFuture<BatchIterator<Row>> getIterator(TransactionContext txnCtx, CollectPhase collectPhase, CollectTask collectTask, boolean supportMoveToStart) {
    FileUriCollectPhase fileUriCollectPhase = (FileUriCollectPhase) collectPhase;
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
    ctx.add(collectPhase.toCollect());
    List<String> fileUris = targetUriToStringList(txnCtx, nodeCtx, fileUriCollectPhase.targetUri());
    return CompletableFuture.completedFuture(FileReadingIterator.newInstance(fileUris, ctx.topLevelInputs(), ctx.expressions(), fileUriCollectPhase.compression(), fileInputFactoryMap, fileUriCollectPhase.sharedStorage(), fileUriCollectPhase.nodeIds().size(), getReaderNumber(fileUriCollectPhase.nodeIds(), clusterService.state().nodes().getLocalNodeId()), fileUriCollectPhase.parserProperties(), fileUriCollectPhase.inputFormat(), fileUriCollectPhase.withClauseOptions()));
}
Also used : FileInputFactory(io.crate.execution.engine.collect.files.FileInputFactory) InputFactory(io.crate.expression.InputFactory) LineCollectorExpression(io.crate.execution.engine.collect.files.LineCollectorExpression) FileLineReferenceResolver(io.crate.expression.reference.file.FileLineReferenceResolver) FileUriCollectPhase(io.crate.execution.dsl.phases.FileUriCollectPhase)

Aggregations

InputFactory (io.crate.expression.InputFactory)6 FileLineReferenceResolver (io.crate.expression.reference.file.FileLineReferenceResolver)6 Input (io.crate.data.Input)5 Reference (io.crate.metadata.Reference)5 LineCollectorExpression (io.crate.execution.engine.collect.files.LineCollectorExpression)4 TestingHelpers.createReference (io.crate.testing.TestingHelpers.createReference)3 Row (io.crate.data.Row)2 FileInputFactory (io.crate.execution.engine.collect.files.FileInputFactory)2 LocalFsFileInputFactory (io.crate.execution.engine.collect.files.LocalFsFileInputFactory)2 ArrayList (java.util.ArrayList)2 Benchmark (org.openjdk.jmh.annotations.Benchmark)2 AmazonS3 (com.amazonaws.services.s3.AmazonS3)1 AmazonS3Client (com.amazonaws.services.s3.AmazonS3Client)1 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)1 S3Object (com.amazonaws.services.s3.model.S3Object)1 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)1 S3ClientHelper (io.crate.copy.s3.common.S3ClientHelper)1 FileUriCollectPhase (io.crate.execution.dsl.phases.FileUriCollectPhase)1 FileInput (io.crate.execution.engine.collect.files.FileInput)1 IOException (java.io.IOException)1