use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class StaticFileSplitEnumeratorTest method testNoMoreSplits.
@Test
public void testNoMoreSplits() throws Exception {
final TestingSplitEnumeratorContext<FileSourceSplit> context = new TestingSplitEnumeratorContext<>(4);
final FileSourceSplit split = createRandomSplit();
final StaticFileSplitEnumerator enumerator = createEnumerator(context, split);
// first split assignment
context.registerReader(1, "somehost");
enumerator.addReader(1);
enumerator.handleSplitRequest(1, "somehost");
// second request has no more split
enumerator.handleSplitRequest(1, "somehost");
assertThat(context.getSplitAssignments().get(1).getAssignedSplits(), contains(split));
assertTrue(context.getSplitAssignments().get(1).hasReceivedNoMoreSplitsSignal());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class BlockSplittingRecursiveEnumeratorTest method testFileWithMultipleBlocks.
@Test
@Override
public void testFileWithMultipleBlocks() throws Exception {
final Path testPath = new Path("testfs:///dir/file");
testFs = TestingFileSystem.createForFileStatus("testfs", TestingFileSystem.TestFileStatus.forFileWithBlocks(testPath, 1000L, new TestingFileSystem.TestBlockLocation(0L, 100L, "host1", "host2"), new TestingFileSystem.TestBlockLocation(100L, 520L, "host2", "host3"), new TestingFileSystem.TestBlockLocation(620L, 380L, "host3", "host4")));
testFs.register();
final BlockSplittingRecursiveEnumerator enumerator = createEnumerator();
final Collection<FileSourceSplit> splits = enumerator.enumerateSplits(new Path[] { new Path("testfs:///dir") }, 0);
final Collection<FileSourceSplit> expected = Arrays.asList(new FileSourceSplit("ignoredId", testPath, 0L, 100L, 0, 1000L, "host1", "host2"), new FileSourceSplit("ignoredId", testPath, 100L, 520L, 0, 1000L, "host1", "host2"), new FileSourceSplit("ignoredId", testPath, 620L, 380L, 0, 1000L, "host1", "host2"));
assertSplitsEqual(expected, splits);
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class LimitableBulkFormatTest method test.
@Test
public void test() throws IOException {
// read
BulkFormat<String, FileSourceSplit> format = LimitableBulkFormat.create(new StreamFormatAdapter<>(new TextLineInputFormat()), 22L);
BulkFormat.Reader<String> reader = format.createReader(new Configuration(), new FileSourceSplit("id", new Path(file.toURI()), 0, file.length(), file.lastModified(), file.length()));
AtomicInteger i = new AtomicInteger(0);
Utils.forEachRemaining(reader, s -> i.incrementAndGet());
Assert.assertEquals(22, i.get());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class FileSystemTableSource method getScanRuntimeProvider.
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
// When this table has no partition, just return a empty source.
if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
return InputFormatProvider.of(new CollectionInputFormat<>(new ArrayList<>(), null));
}
// Resolve metadata and make sure to filter out metadata not in the producedDataType
final List<String> metadataKeys = DataType.getFieldNames(producedDataType).stream().filter(((this.metadataKeys == null) ? Collections.emptyList() : this.metadataKeys)::contains).collect(Collectors.toList());
final List<ReadableFileInfo> metadataToExtract = metadataKeys.stream().map(ReadableFileInfo::resolve).collect(Collectors.toList());
// Filter out partition columns not in producedDataType
final List<String> partitionKeysToExtract = DataType.getFieldNames(producedDataType).stream().filter(this.partitionKeys::contains).collect(Collectors.toList());
// Compute the physical projection and the physical data type, that is
// the type without partition columns and metadata in the same order of the schema
DataType physicalDataType = physicalRowDataType;
final Projection partitionKeysProjections = Projection.fromFieldNames(physicalDataType, partitionKeysToExtract);
final Projection physicalProjections = (projectFields != null ? Projection.of(projectFields) : Projection.all(physicalDataType)).difference(partitionKeysProjections);
physicalDataType = partitionKeysProjections.complement(physicalDataType).project(physicalDataType);
if (bulkReaderFormat != null) {
if (bulkReaderFormat instanceof BulkDecodingFormat && filters != null && filters.size() > 0) {
((BulkDecodingFormat<RowData>) bulkReaderFormat).applyFilters(filters);
}
BulkFormat<RowData, FileSourceSplit> format;
if (bulkReaderFormat instanceof ProjectableDecodingFormat) {
format = ((ProjectableDecodingFormat<BulkFormat<RowData, FileSourceSplit>>) bulkReaderFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes());
} else {
format = new ProjectingBulkFormat(bulkReaderFormat.createRuntimeDecoder(scanContext, physicalDataType), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else if (deserializationFormat != null) {
BulkFormat<RowData, FileSourceSplit> format;
if (deserializationFormat instanceof ProjectableDecodingFormat) {
format = new DeserializationSchemaAdapter(((ProjectableDecodingFormat<DeserializationSchema<RowData>>) deserializationFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes()));
} else {
format = new ProjectingBulkFormat(new DeserializationSchemaAdapter(deserializationFormat.createRuntimeDecoder(scanContext, physicalDataType)), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else {
throw new TableException("Can not find format factory.");
}
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class FileSystemTableSink method createCompactReaderFactory.
private Optional<CompactReader.Factory<RowData>> createCompactReaderFactory(Context context) {
// Compute producedDataType (including partition fields) and physicalDataType (excluding
// partition fields)
final DataType producedDataType = physicalRowDataType;
final DataType physicalDataType = DataType.getFields(producedDataType).stream().filter(field -> !partitionKeys.contains(field.getName())).collect(Collectors.collectingAndThen(Collectors.toList(), DataTypes::ROW));
if (bulkReaderFormat != null) {
final BulkFormat<RowData, FileSourceSplit> format = new FileInfoExtractorBulkFormat(bulkReaderFormat.createRuntimeDecoder(createSourceContext(context), physicalDataType), producedDataType, context.createTypeInformation(producedDataType), Collections.emptyMap(), partitionKeys, defaultPartName);
return Optional.of(CompactBulkReader.factory(format));
} else if (deserializationFormat != null) {
final DeserializationSchema<RowData> decoder = deserializationFormat.createRuntimeDecoder(createSourceContext(context), physicalDataType);
final BulkFormat<RowData, FileSourceSplit> format = new FileInfoExtractorBulkFormat(new DeserializationSchemaAdapter(decoder), producedDataType, context.createTypeInformation(producedDataType), Collections.emptyMap(), partitionKeys, defaultPartName);
return Optional.of(CompactBulkReader.factory(format));
}
return Optional.empty();
}
Aggregations