Search in sources :

Example 1 with PrestoSparkMutableRow

use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.

the class PrestoSparkShufflePageInput method getNextPage.

@Override
public Page getNextPage(UpdateMemory updateMemory) {
    SliceOutput output = new DynamicSliceOutput(types.isEmpty() ? 0 : BUFFER_SIZE);
    int rowCount = 0;
    synchronized (this) {
        while (currentIteratorIndex < shuffleInputs.size()) {
            PrestoSparkShuffleInput input = shuffleInputs.get(currentIteratorIndex);
            Iterator<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> iterator = input.getIterator();
            long currentIteratorProcessedBytes = 0;
            long currentIteratorProcessedRows = 0;
            long currentIteratorProcessedRowBatches = 0;
            long start = System.currentTimeMillis();
            while (iterator.hasNext() && output.size() <= TARGET_SIZE && rowCount <= MAX_ROWS_PER_PAGE) {
                currentIteratorProcessedRowBatches++;
                PrestoSparkMutableRow row = iterator.next()._2;
                if (row.getBuffer() != null) {
                    ByteBuffer buffer = row.getBuffer();
                    verify(buffer.remaining() >= 2, "row data is expected to be at least 2 bytes long");
                    currentIteratorProcessedBytes += buffer.remaining();
                    short entryRowCount = getShortLittleEndian(buffer);
                    rowCount += entryRowCount;
                    currentIteratorProcessedRows += entryRowCount;
                    buffer.position(buffer.position() + 2);
                    output.writeBytes(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
                } else if (row.getArray() != null) {
                    verify(row.getLength() >= 2, "row data is expected to be at least 2 bytes long");
                    currentIteratorProcessedBytes += row.getLength();
                    short entryRowCount = getShortLittleEndian(row.getArray(), row.getOffset());
                    rowCount += entryRowCount;
                    currentIteratorProcessedRows += entryRowCount;
                    output.writeBytes(row.getArray(), row.getOffset() + 2, row.getLength() - 2);
                } else {
                    throw new IllegalArgumentException("Unexpected PrestoSparkMutableRow: 'buffer' and 'array' fields are both null");
                }
            }
            long end = System.currentTimeMillis();
            shuffleStats.accumulate(currentIteratorProcessedRows, currentIteratorProcessedRowBatches, currentIteratorProcessedBytes, end - start);
            if (!iterator.hasNext()) {
                shuffleStatsCollector.add(new PrestoSparkShuffleStats(input.getFragmentId(), taskId, READ, shuffleStats.getProcessedRows(), shuffleStats.getProcessedRowBatches(), shuffleStats.getProcessedBytes(), shuffleStats.getElapsedWallTimeMills()));
                shuffleStats.reset();
                currentIteratorIndex++;
            } else {
                break;
            }
        }
    }
    if (rowCount == 0) {
        return null;
    }
    return createPage(rowCount, output.slice().getInput(), types);
}
Also used : SliceOutput(io.airlift.slice.SliceOutput) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) PrestoSparkShuffleStats(com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats) Tuple2(scala.Tuple2) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) ByteBuffer(java.nio.ByteBuffer) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)

Example 2 with PrestoSparkMutableRow

use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.

the class PrestoSparkRddFactory method createRdd.

private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
    checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
    PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
    SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
    Optional<Integer> numberOfShufflePartitions = Optional.empty();
    Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
    for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
        RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
        shuffleInputRddMap.put(input.getKey().toString(), rdd);
        if (!numberOfShufflePartitions.isPresent()) {
            numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
        } else {
            checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
        }
    }
    PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
    Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
    List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
    if (!tableScans.isEmpty()) {
        try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
            SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
            Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
            taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
        }
    } else if (rddInputs.size() == 0) {
        checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
        // In case of no inputs we still need to schedule a task.
        // Task with no inputs may produce results (e.g.: ValuesNode).
        // To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
        // Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
        taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
    } else {
        taskSourceRdd = Optional.empty();
    }
    return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
Also used : SerializedPrestoSparkTaskDescriptor(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor) PrestoSparkTaskDescriptor(com.facebook.presto.spark.PrestoSparkTaskDescriptor) HashMap(java.util.HashMap) SplitSourceFactory(com.facebook.presto.sql.planner.SplitSourceFactory) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) MutablePartitionId(com.facebook.presto.spark.classloader_interface.MutablePartitionId) RDD(org.apache.spark.rdd.RDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow) PrestoSparkTaskSourceRdd(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd) PrestoSparkTaskProcessor(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskProcessor) SerializedPrestoSparkTaskDescriptor(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor) CloseableSplitSourceProvider(com.facebook.presto.split.CloseableSplitSourceProvider) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) Tuple2(scala.Tuple2) SplitSource(com.facebook.presto.split.SplitSource) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap)

Example 3 with PrestoSparkMutableRow

use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.

the class TestPrestoSparkRowBatch method getEntries.

private static List<List<Row>> getEntries(PrestoSparkRowBatch rowBatch) {
    ImmutableList.Builder<List<Row>> entries = ImmutableList.builder();
    RowTupleSupplier rowTupleSupplier = rowBatch.createRowTupleSupplier();
    while (true) {
        Tuple2<MutablePartitionId, PrestoSparkMutableRow> next = rowTupleSupplier.getNext();
        if (next == null) {
            break;
        }
        ImmutableList.Builder<Row> entry = ImmutableList.builder();
        int partition = next._1.getPartition();
        PrestoSparkMutableRow mutableRow = next._2;
        ByteBuffer buffer = mutableRow.getBuffer();
        buffer.order(LITTLE_ENDIAN);
        short rowCount = buffer.getShort();
        assertEquals(mutableRow.getPositionCount(), rowCount);
        for (int i = 0; i < rowCount; i++) {
            entry.add(new Row(partition, readRowData(buffer)));
        }
        entries.add(entry.build());
    }
    return entries.build();
}
Also used : MutablePartitionId(com.facebook.presto.spark.classloader_interface.MutablePartitionId) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) RowTupleSupplier(com.facebook.presto.spark.execution.PrestoSparkRowBatch.RowTupleSupplier) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow) ByteBuffer(java.nio.ByteBuffer) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)

Aggregations

PrestoSparkMutableRow (com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)3 MutablePartitionId (com.facebook.presto.spark.classloader_interface.MutablePartitionId)2 ByteBuffer (java.nio.ByteBuffer)2 Tuple2 (scala.Tuple2)2 PrestoSparkTaskDescriptor (com.facebook.presto.spark.PrestoSparkTaskDescriptor)1 PrestoSparkShuffleStats (com.facebook.presto.spark.classloader_interface.PrestoSparkShuffleStats)1 PrestoSparkTaskProcessor (com.facebook.presto.spark.classloader_interface.PrestoSparkTaskProcessor)1 PrestoSparkTaskSourceRdd (com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd)1 SerializedPrestoSparkTaskDescriptor (com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor)1 RowTupleSupplier (com.facebook.presto.spark.execution.PrestoSparkRowBatch.RowTupleSupplier)1 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)1 TableScanNode (com.facebook.presto.spi.plan.TableScanNode)1 CloseableSplitSourceProvider (com.facebook.presto.split.CloseableSplitSourceProvider)1 SplitSource (com.facebook.presto.split.SplitSource)1 SplitSourceFactory (com.facebook.presto.sql.planner.SplitSourceFactory)1 PlanFragmentId (com.facebook.presto.sql.planner.plan.PlanFragmentId)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 DynamicSliceOutput (io.airlift.slice.DynamicSliceOutput)1