use of org.apache.flink.connectors.hive.read.HiveSourceSplit in project flink by apache.
the class ContinuousHivePendingSplitsCheckpointSerializer method serialize.
@Override
public byte[] serialize(PendingSplitsCheckpoint<HiveSourceSplit> checkpoint) throws IOException {
Preconditions.checkArgument(checkpoint.getClass() == ContinuousHivePendingSplitsCheckpoint.class, "Only supports %s", ContinuousHivePendingSplitsCheckpoint.class.getName());
ContinuousHivePendingSplitsCheckpoint hiveCheckpoint = (ContinuousHivePendingSplitsCheckpoint) checkpoint;
PendingSplitsCheckpoint<HiveSourceSplit> superCP = PendingSplitsCheckpoint.fromCollectionSnapshot(hiveCheckpoint.getSplits(), hiveCheckpoint.getAlreadyProcessedPaths());
byte[] superBytes = superSerDe.serialize(superCP);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (DataOutputViewStreamWrapper outputWrapper = new DataOutputViewStreamWrapper(byteArrayOutputStream)) {
outputWrapper.writeInt(superBytes.length);
outputWrapper.write(superBytes);
readOffsetSerDe.serialize(hiveCheckpoint.getCurrentReadOffset(), outputWrapper);
outputWrapper.writeInt(hiveCheckpoint.getSeenPartitionsSinceOffset().size());
for (List<String> partition : hiveCheckpoint.getSeenPartitionsSinceOffset()) {
partitionSerDe.serialize(partition, outputWrapper);
}
}
return byteArrayOutputStream.toByteArray();
}
use of org.apache.flink.connectors.hive.read.HiveSourceSplit in project flink by apache.
the class HiveSourceSplitSerializer method deserializeV1.
private HiveSourceSplit deserializeV1(ObjectInputStream inputStream) throws IOException {
try {
int superLen = inputStream.readInt();
byte[] superBytes = new byte[superLen];
inputStream.readFully(superBytes);
FileSourceSplit superSplit = FileSourceSplitSerializer.INSTANCE.deserialize(FileSourceSplitSerializer.INSTANCE.getVersion(), superBytes);
HiveTablePartition hiveTablePartition = (HiveTablePartition) inputStream.readObject();
return new HiveSourceSplit(superSplit.splitId(), superSplit.path(), superSplit.offset(), superSplit.length(), superSplit.fileModificationTime(), superSplit.fileSize(), superSplit.hostnames(), superSplit.getReaderPosition().orElse(null), hiveTablePartition);
} catch (ClassNotFoundException e) {
throw new IOException("Failed to deserialize HiveSourceSplit", e);
}
}
use of org.apache.flink.connectors.hive.read.HiveSourceSplit in project flink by apache.
the class HiveSourceFileEnumerator method createInputSplits.
public static List<HiveSourceSplit> createInputSplits(int minNumSplits, List<HiveTablePartition> partitions, ReadableConfig flinkConf, JobConf jobConf) throws IOException {
List<HiveSourceSplit> hiveSplits = new ArrayList<>();
try (MRSplitsGetter splitsGetter = new MRSplitsGetter(flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_LOAD_PARTITION_SPLITS_THREAD_NUM))) {
for (HiveTablePartitionSplits partitionSplits : splitsGetter.getHiveTablePartitionMRSplits(minNumSplits, partitions, jobConf)) {
HiveTablePartition partition = partitionSplits.getHiveTablePartition();
for (InputSplit inputSplit : partitionSplits.getInputSplits()) {
Preconditions.checkState(inputSplit instanceof FileSplit, "Unsupported InputSplit type: " + inputSplit.getClass().getName());
hiveSplits.add(new HiveSourceSplit((FileSplit) inputSplit, partition, null));
}
}
}
return hiveSplits;
}
use of org.apache.flink.connectors.hive.read.HiveSourceSplit in project flink by apache.
the class HiveSourceSplitSerializer method serialize.
@Override
public byte[] serialize(HiveSourceSplit split) throws IOException {
checkArgument(split.getClass() == HiveSourceSplit.class, "Cannot serialize subclasses of HiveSourceSplit");
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (ObjectOutputStream outputStream = new ObjectOutputStream(byteArrayOutputStream)) {
serialize(outputStream, split);
}
return byteArrayOutputStream.toByteArray();
}
use of org.apache.flink.connectors.hive.read.HiveSourceSplit in project flink by apache.
the class ContinuousHivePendingSplitsCheckpointSerializer method deserializeV1.
private PendingSplitsCheckpoint<HiveSourceSplit> deserializeV1(DataInputViewStreamWrapper inputWrapper) throws IOException {
byte[] superBytes = new byte[inputWrapper.readInt()];
inputWrapper.readFully(superBytes);
PendingSplitsCheckpoint<HiveSourceSplit> superCP = superSerDe.deserialize(superSerDe.getVersion(), superBytes);
try {
Comparable<?> currentReadOffset = readOffsetSerDe.deserialize(inputWrapper);
int numParts = inputWrapper.readInt();
List<List<String>> parts = new ArrayList<>(numParts);
for (int i = 0; i < numParts; i++) {
parts.add(partitionSerDe.deserialize(inputWrapper));
}
return new ContinuousHivePendingSplitsCheckpoint(superCP.getSplits(), currentReadOffset, parts);
} catch (ClassNotFoundException e) {
throw new IOException("Failed to deserialize " + getClass().getName(), e);
}
}
Aggregations