use of org.apache.iceberg.FileScanTask in project drill by apache.
the class IcebergBlockMapBuilder method getEndpointByteMap.
/**
* For a given FileWork, calculate how many bytes are available on each on drillbit endpoint
*
* @param scanTask the CombinedScanTask to calculate endpoint bytes for
*/
public EndpointByteMap getEndpointByteMap(CombinedScanTask scanTask) throws IOException {
EndpointByteMapImpl endpointByteMap = new EndpointByteMapImpl();
for (FileScanTask work : scanTask.files()) {
RangeMap<Long, BlockLocation> blockMap = getBlockMap(work);
long start = work.start();
long end = start + work.length();
Range<Long> scanTaskRange = Range.closedOpen(start, end);
// Find sub-map of ranges that intersect with the scan task
RangeMap<Long, BlockLocation> subRangeMap = blockMap.subRangeMap(scanTaskRange);
// Iterate through each block in this sub-map and get the host for the block location
for (Entry<Range<Long>, BlockLocation> block : subRangeMap.asMapOfRanges().entrySet()) {
Range<Long> intersection = scanTaskRange.intersection(block.getKey());
long bytes = intersection.upperEndpoint() - intersection.lowerEndpoint();
// For each host in the current block location, add the intersecting bytes to the corresponding endpoint
for (String host : block.getValue().getHosts()) {
DrillbitEndpoint endpoint = endPointMap.get(host);
if (endpoint != null) {
endpointByteMap.add(endpoint, bytes);
}
}
}
logger.debug("FileScanTask group ({},{}) max bytes {}", work.file().path(), work.start(), endpointByteMap.getMaxBytes());
}
return endpointByteMap;
}
use of org.apache.iceberg.FileScanTask in project presto by prestodb.
the class PartitionTable method getPartitions.
private Map<StructLikeWrapper, Partition> getPartitions(TableScan tableScan) {
try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
Map<StructLikeWrapper, Partition> partitions = new HashMap<>();
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
Types.StructType structType = fileScanTask.spec().partitionType();
StructLike partitionStruct = dataFile.partition();
StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(partitionStruct);
if (!partitions.containsKey(partitionWrapper)) {
Partition partition = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, partitionStruct, dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(dataFile.lowerBounds()), toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
partitions.put(partitionWrapper, partition);
continue;
}
Partition partition = partitions.get(partitionWrapper);
partition.incrementFileCount();
partition.incrementRecordCount(dataFile.recordCount());
partition.incrementSize(dataFile.fileSizeInBytes());
partition.updateMin(toMap(dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
partition.updateMax(toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
partition.updateNullCount(dataFile.nullValueCounts());
}
return partitions;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of org.apache.iceberg.FileScanTask in project presto by prestodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
// TODO: move this to a background thread
List<ConnectorSplit> splits = new ArrayList<>();
Iterator<FileScanTask> iterator = limit(fileScanIterator, maxSize);
while (iterator.hasNext()) {
FileScanTask task = iterator.next();
splits.add(toIcebergSplit(task));
}
return completedFuture(new ConnectorSplitBatch(splits, isFinished()));
}
use of org.apache.iceberg.FileScanTask in project hive by apache.
the class HiveIcebergSplit method write.
@Override
public void write(DataOutput out) throws IOException {
for (FileScanTask fileScanTask : icebergSplit().task().files()) {
if (fileScanTask.residual() != Expressions.alwaysTrue() && fileScanTask.getClass().isAssignableFrom(SPLIT_SCAN_TASK_CLAZZ)) {
Object residuals = RESIDUALS_FIELD.get(FILE_SCAN_TASK_FIELD.get(fileScanTask));
if (fileScanTask.spec().isPartitioned()) {
EXPR_FIELD.set(residuals, Expressions.alwaysTrue());
} else {
UNPARTITIONED_EXPR_FIELD.set(residuals, Expressions.alwaysTrue());
}
}
}
byte[] bytes = SerializationUtil.serializeToBytes(tableLocation);
out.writeInt(bytes.length);
out.write(bytes);
innerSplit.write(out);
}
use of org.apache.iceberg.FileScanTask in project hive by apache.
the class HiveIcebergSplit method getBytesForHash.
@Override
public byte[] getBytesForHash() {
Collection<FileScanTask> fileScanTasks = innerSplit.task().files();
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
for (FileScanTask task : fileScanTasks) {
baos.write(task.file().path().toString().getBytes());
baos.write(Longs.toByteArray(task.start()));
}
return baos.toByteArray();
} catch (IOException ioe) {
throw new RuntimeException("Couldn't produce hash input bytes for HiveIcebergSplit: " + this, ioe);
}
}
Aggregations