use of io.crate.execution.engine.collect.files.LineCollectorExpression in project crate by crate.
the class CsvReaderBenchmark method measureFileReadingIteratorForCSV.
@Benchmark()
public void measureFileReadingIteratorForCSV(Blackhole blackhole) {
Reference raw = createReference("_raw", DataTypes.STRING);
InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
BatchIterator<Row> batchIterator = FileReadingIterator.newInstance(Collections.singletonList(fileUri), inputs, ctx.expressions(), null, Map.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory()), false, 1, 0, CopyFromParserProperties.DEFAULT, CSV, Settings.EMPTY);
while (batchIterator.moveNext()) {
blackhole.consume(batchIterator.currentElement().get(0));
}
}
use of io.crate.execution.engine.collect.files.LineCollectorExpression in project crate by crate.
the class S3FileReadingCollectorTest method createBatchIterator.
private BatchIterator<Row> createBatchIterator(Collection<String> fileUris, String compression, final S3ObjectInputStream s3InputStream, boolean collectSourceUriFailure) {
InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
List<Input<?>> inputs = new ArrayList<>(2);
Reference raw = createReference(SourceLineExpression.COLUMN_NAME, DataTypes.STRING);
inputs.add(ctx.add(raw));
if (collectSourceUriFailure) {
Reference sourceUriFailure = createReference(SourceUriFailureExpression.COLUMN_NAME, DataTypes.STRING);
// noinspection unchecked
sourceUriFailureInput = (Input<String>) ctx.add(sourceUriFailure);
inputs.add(sourceUriFailureInput);
}
return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, Map.of(S3FileInputFactory.NAME, new FileInputFactory() {
@Override
public FileInput create(URI uri, Settings withClauseOptions) throws IOException {
return new S3FileInput(new S3ClientHelper() {
@Override
protected AmazonS3 initClient(String accessKey, String secretKey, String endpoint, String protocol) throws IOException {
AmazonS3 client = mock(AmazonS3Client.class);
ObjectListing objectListing = mock(ObjectListing.class);
S3ObjectSummary summary = mock(S3ObjectSummary.class);
S3Object s3Object = mock(S3Object.class);
when(client.listObjects(anyString(), anyString())).thenReturn(objectListing);
when(objectListing.getObjectSummaries()).thenReturn(Collections.singletonList(summary));
when(summary.getKey()).thenReturn("foo");
when(client.getObject("fakebucket", "foo")).thenReturn(s3Object);
when(s3Object.getObjectContent()).thenReturn(s3InputStream);
when(client.listNextBatchOfObjects(any(ObjectListing.class))).thenReturn(objectListing);
when(objectListing.isTruncated()).thenReturn(false);
return client;
}
}, uri, "https");
}
}), false, 1, 0, CopyFromParserProperties.DEFAULT, FileUriCollectPhase.InputFormat.JSON, Settings.EMPTY);
}
use of io.crate.execution.engine.collect.files.LineCollectorExpression in project crate by crate.
the class JsonReaderBenchmark method measureFileReadingIteratorForJson.
@Benchmark()
public void measureFileReadingIteratorForJson(Blackhole blackhole) {
Reference raw = createReference("_raw", DataTypes.STRING);
InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
BatchIterator<Row> batchIterator = FileReadingIterator.newInstance(Collections.singletonList(fileUri), inputs, ctx.expressions(), null, Map.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory()), false, 1, 0, CopyFromParserProperties.DEFAULT, JSON, Settings.EMPTY);
while (batchIterator.moveNext()) {
blackhole.consume(batchIterator.currentElement().get(0));
}
}
use of io.crate.execution.engine.collect.files.LineCollectorExpression in project crate by crate.
the class FileLineReferenceResolver method getImplementation.
public static LineCollectorExpression<?> getImplementation(Reference ref) {
ColumnIdent columnIdent = ref.column();
Supplier<LineCollectorExpression<?>> supplier = EXPRESSION_BUILDER.get(columnIdent.name());
if (supplier == null) {
return new ColumnExtractingLineExpression(columnIdent, ref.valueType());
}
return supplier.get();
}
use of io.crate.execution.engine.collect.files.LineCollectorExpression in project crate by crate.
the class FileCollectSource method getIterator.
@Override
public CompletableFuture<BatchIterator<Row>> getIterator(TransactionContext txnCtx, CollectPhase collectPhase, CollectTask collectTask, boolean supportMoveToStart) {
FileUriCollectPhase fileUriCollectPhase = (FileUriCollectPhase) collectPhase;
InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(txnCtx, FileLineReferenceResolver::getImplementation);
ctx.add(collectPhase.toCollect());
List<String> fileUris = targetUriToStringList(txnCtx, nodeCtx, fileUriCollectPhase.targetUri());
return CompletableFuture.completedFuture(FileReadingIterator.newInstance(fileUris, ctx.topLevelInputs(), ctx.expressions(), fileUriCollectPhase.compression(), fileInputFactoryMap, fileUriCollectPhase.sharedStorage(), fileUriCollectPhase.nodeIds().size(), getReaderNumber(fileUriCollectPhase.nodeIds(), clusterService.state().nodes().getLocalNodeId()), fileUriCollectPhase.parserProperties(), fileUriCollectPhase.inputFormat(), fileUriCollectPhase.withClauseOptions()));
}
Aggregations