use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class ContinuousHiveSplitEnumerator method assignSplits.
private void assignSplits() {
final Iterator<Map.Entry<Integer, String>> awaitingReader = readersAwaitingSplit.entrySet().iterator();
while (awaitingReader.hasNext()) {
final Map.Entry<Integer, String> nextAwaiting = awaitingReader.next();
final String hostname = nextAwaiting.getValue();
final int awaitingSubtask = nextAwaiting.getKey();
final Optional<FileSourceSplit> nextSplit = splitAssigner.getNext(hostname);
if (nextSplit.isPresent()) {
enumeratorContext.assignSplit((HiveSourceSplit) nextSplit.get(), awaitingSubtask);
awaitingReader.remove();
} else {
break;
}
}
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class AvroBulkFormatTest method testRestoreReader.
@Test
public void testRestoreReader() throws IOException {
AvroBulkFormatTestUtils.TestingAvroBulkFormat bulkFormat = new AvroBulkFormatTestUtils.TestingAvroBulkFormat();
long splitLength = tmpFile.length() / 3;
String splitId = UUID.randomUUID().toString();
FileSourceSplit split = new FileSourceSplit(splitId, new Path(tmpFile.toString()), splitLength * 2, tmpFile.length());
BulkFormat.Reader<RowData> reader = bulkFormat.createReader(new Configuration(), split);
long offset1 = assertBatch(reader, new BatchInfo(3, 5));
assertBatch(reader, new BatchInfo(5, 6));
assertThat(reader.readBatch()).isNull();
reader.close();
split = new FileSourceSplit(splitId, new Path(tmpFile.toString()), splitLength * 2, tmpFile.length(), StringUtils.EMPTY_STRING_ARRAY, new CheckpointedPosition(offset1, 1));
reader = bulkFormat.restoreReader(new Configuration(), split);
long offset2 = assertBatch(reader, new BatchInfo(3, 5), 1);
assertBatch(reader, new BatchInfo(5, 6));
assertThat(reader.readBatch()).isNull();
reader.close();
assertThat(offset2).isEqualTo(offset1);
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class AvroBulkFormatTest method assertSplit.
private void assertSplit(AvroBulkFormatTestUtils.TestingAvroBulkFormat bulkFormat, List<SplitInfo> splitInfos) throws IOException {
for (SplitInfo splitInfo : splitInfos) {
FileSourceSplit split = new FileSourceSplit(UUID.randomUUID().toString(), new Path(tmpFile.toString()), splitInfo.start, splitInfo.end - splitInfo.start);
BulkFormat.Reader<RowData> reader = bulkFormat.createReader(new Configuration(), split);
List<Long> offsets = new ArrayList<>();
for (BatchInfo batch : splitInfo.batches) {
offsets.add(assertBatch(reader, batch));
}
assertThat(reader.readBatch()).isNull();
for (int j = 1; j < offsets.size(); j++) {
assertThat(offsets.get(j - 1) < offsets.get(j)).isTrue();
}
reader.close();
}
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class HiveSourceSplitSerializer method serialize.
private void serialize(ObjectOutputStream outputStream, HiveSourceSplit split) throws IOException {
byte[] superBytes = FileSourceSplitSerializer.INSTANCE.serialize(new FileSourceSplit(split.splitId(), split.path(), split.offset(), split.length(), split.fileModificationTime(), split.fileSize(), split.hostnames(), split.getReaderPosition().orElse(null)));
outputStream.writeInt(superBytes.length);
outputStream.write(superBytes);
outputStream.writeObject(split.getHiveTablePartition());
}
use of org.apache.flink.connector.file.src.FileSourceSplit in project flink by apache.
the class HiveSourceSplitSerializer method deserializeV1.
private HiveSourceSplit deserializeV1(ObjectInputStream inputStream) throws IOException {
try {
int superLen = inputStream.readInt();
byte[] superBytes = new byte[superLen];
inputStream.readFully(superBytes);
FileSourceSplit superSplit = FileSourceSplitSerializer.INSTANCE.deserialize(FileSourceSplitSerializer.INSTANCE.getVersion(), superBytes);
HiveTablePartition hiveTablePartition = (HiveTablePartition) inputStream.readObject();
return new HiveSourceSplit(superSplit.splitId(), superSplit.path(), superSplit.offset(), superSplit.length(), superSplit.fileModificationTime(), superSplit.fileSize(), superSplit.hostnames(), superSplit.getReaderPosition().orElse(null), hiveTablePartition);
} catch (ClassNotFoundException e) {
throw new IOException("Failed to deserialize HiveSourceSplit", e);
}
}
Aggregations