Search in sources :

Example 1 with FileScanTask

use of org.apache.iceberg.FileScanTask in project drill by apache.

the class IcebergBlockMapBuilder method getEndpointByteMap.

/**
 * For a given FileWork, calculate how many bytes are available on each on drillbit endpoint
 *
 * @param scanTask the CombinedScanTask to calculate endpoint bytes for
 */
public EndpointByteMap getEndpointByteMap(CombinedScanTask scanTask) throws IOException {
    EndpointByteMapImpl endpointByteMap = new EndpointByteMapImpl();
    for (FileScanTask work : scanTask.files()) {
        RangeMap<Long, BlockLocation> blockMap = getBlockMap(work);
        long start = work.start();
        long end = start + work.length();
        Range<Long> scanTaskRange = Range.closedOpen(start, end);
        // Find sub-map of ranges that intersect with the scan task
        RangeMap<Long, BlockLocation> subRangeMap = blockMap.subRangeMap(scanTaskRange);
        // Iterate through each block in this sub-map and get the host for the block location
        for (Entry<Range<Long>, BlockLocation> block : subRangeMap.asMapOfRanges().entrySet()) {
            Range<Long> intersection = scanTaskRange.intersection(block.getKey());
            long bytes = intersection.upperEndpoint() - intersection.lowerEndpoint();
            // For each host in the current block location, add the intersecting bytes to the corresponding endpoint
            for (String host : block.getValue().getHosts()) {
                DrillbitEndpoint endpoint = endPointMap.get(host);
                if (endpoint != null) {
                    endpointByteMap.add(endpoint, bytes);
                }
            }
        }
        logger.debug("FileScanTask group ({},{}) max bytes {}", work.file().path(), work.start(), endpointByteMap.getMaxBytes());
    }
    return endpointByteMap;
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) EndpointByteMapImpl(org.apache.drill.exec.store.schedule.EndpointByteMapImpl) FileScanTask(org.apache.iceberg.FileScanTask) BlockLocation(org.apache.hadoop.fs.BlockLocation) Range(org.apache.drill.shaded.guava.com.google.common.collect.Range)

Example 2 with FileScanTask

use of org.apache.iceberg.FileScanTask in project presto by prestodb.

the class PartitionTable method getPartitions.

private Map<StructLikeWrapper, Partition> getPartitions(TableScan tableScan) {
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        Map<StructLikeWrapper, Partition> partitions = new HashMap<>();
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            Types.StructType structType = fileScanTask.spec().partitionType();
            StructLike partitionStruct = dataFile.partition();
            StructLikeWrapper partitionWrapper = StructLikeWrapper.forType(structType).set(partitionStruct);
            if (!partitions.containsKey(partitionWrapper)) {
                Partition partition = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, partitionStruct, dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(dataFile.lowerBounds()), toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
                partitions.put(partitionWrapper, partition);
                continue;
            }
            Partition partition = partitions.get(partitionWrapper);
            partition.incrementFileCount();
            partition.incrementRecordCount(dataFile.recordCount());
            partition.incrementSize(dataFile.fileSizeInBytes());
            partition.updateMin(toMap(dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
            partition.updateMax(toMap(dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
            partition.updateNullCount(dataFile.nullValueCounts());
        }
        return partitions;
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : DataFile(org.apache.iceberg.DataFile) Types(org.apache.iceberg.types.Types) HashMap(java.util.HashMap) StructLikeWrapper(org.apache.iceberg.util.StructLikeWrapper) UncheckedIOException(java.io.UncheckedIOException) StructLike(org.apache.iceberg.StructLike) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) FileScanTask(org.apache.iceberg.FileScanTask)

Example 3 with FileScanTask

use of org.apache.iceberg.FileScanTask in project presto by prestodb.

the class IcebergSplitSource method getNextBatch.

@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
    // TODO: move this to a background thread
    List<ConnectorSplit> splits = new ArrayList<>();
    Iterator<FileScanTask> iterator = limit(fileScanIterator, maxSize);
    while (iterator.hasNext()) {
        FileScanTask task = iterator.next();
        splits.add(toIcebergSplit(task));
    }
    return completedFuture(new ConnectorSplitBatch(splits, isFinished()));
}
Also used : ArrayList(java.util.ArrayList) FileScanTask(org.apache.iceberg.FileScanTask) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit)

Example 4 with FileScanTask

use of org.apache.iceberg.FileScanTask in project hive by apache.

the class HiveIcebergSplit method write.

@Override
public void write(DataOutput out) throws IOException {
    for (FileScanTask fileScanTask : icebergSplit().task().files()) {
        if (fileScanTask.residual() != Expressions.alwaysTrue() && fileScanTask.getClass().isAssignableFrom(SPLIT_SCAN_TASK_CLAZZ)) {
            Object residuals = RESIDUALS_FIELD.get(FILE_SCAN_TASK_FIELD.get(fileScanTask));
            if (fileScanTask.spec().isPartitioned()) {
                EXPR_FIELD.set(residuals, Expressions.alwaysTrue());
            } else {
                UNPARTITIONED_EXPR_FIELD.set(residuals, Expressions.alwaysTrue());
            }
        }
    }
    byte[] bytes = SerializationUtil.serializeToBytes(tableLocation);
    out.writeInt(bytes.length);
    out.write(bytes);
    innerSplit.write(out);
}
Also used : FileScanTask(org.apache.iceberg.FileScanTask)

Example 5 with FileScanTask

use of org.apache.iceberg.FileScanTask in project hive by apache.

the class HiveIcebergSplit method getBytesForHash.

@Override
public byte[] getBytesForHash() {
    Collection<FileScanTask> fileScanTasks = innerSplit.task().files();
    try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
        for (FileScanTask task : fileScanTasks) {
            baos.write(task.file().path().toString().getBytes());
            baos.write(Longs.toByteArray(task.start()));
        }
        return baos.toByteArray();
    } catch (IOException ioe) {
        throw new RuntimeException("Couldn't produce hash input bytes for HiveIcebergSplit: " + this, ioe);
    }
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) FileScanTask(org.apache.iceberg.FileScanTask)

Aggregations

FileScanTask (org.apache.iceberg.FileScanTask)7 IOException (java.io.IOException)4 UncheckedIOException (java.io.UncheckedIOException)2 Collectors (java.util.stream.Collectors)2 NullableValue (com.facebook.presto.common.predicate.NullableValue)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 ExpressionConverter.toIcebergExpression (com.facebook.presto.iceberg.ExpressionConverter.toIcebergExpression)1 IcebergUtil.getColumns (com.facebook.presto.iceberg.IcebergUtil.getColumns)1 IcebergUtil.getIdentityPartitions (com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions)1 Partition.toMap (com.facebook.presto.iceberg.Partition.toMap)1 TypeConverter.toPrestoType (com.facebook.presto.iceberg.TypeConverter.toPrestoType)1 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)1 Constraint (com.facebook.presto.spi.Constraint)1 ColumnStatistics (com.facebook.presto.spi.statistics.ColumnStatistics)1 DoubleRange (com.facebook.presto.spi.statistics.DoubleRange)1 Estimate (com.facebook.presto.spi.statistics.Estimate)1 TableStatistics (com.facebook.presto.spi.statistics.TableStatistics)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1