use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.
the class PrestoSparkShufflePageInput method getNextPage.
@Override
public Page getNextPage(UpdateMemory updateMemory) {
SliceOutput output = new DynamicSliceOutput(types.isEmpty() ? 0 : BUFFER_SIZE);
int rowCount = 0;
synchronized (this) {
while (currentIteratorIndex < shuffleInputs.size()) {
PrestoSparkShuffleInput input = shuffleInputs.get(currentIteratorIndex);
Iterator<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> iterator = input.getIterator();
long currentIteratorProcessedBytes = 0;
long currentIteratorProcessedRows = 0;
long currentIteratorProcessedRowBatches = 0;
long start = System.currentTimeMillis();
while (iterator.hasNext() && output.size() <= TARGET_SIZE && rowCount <= MAX_ROWS_PER_PAGE) {
currentIteratorProcessedRowBatches++;
PrestoSparkMutableRow row = iterator.next()._2;
if (row.getBuffer() != null) {
ByteBuffer buffer = row.getBuffer();
verify(buffer.remaining() >= 2, "row data is expected to be at least 2 bytes long");
currentIteratorProcessedBytes += buffer.remaining();
short entryRowCount = getShortLittleEndian(buffer);
rowCount += entryRowCount;
currentIteratorProcessedRows += entryRowCount;
buffer.position(buffer.position() + 2);
output.writeBytes(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
} else if (row.getArray() != null) {
verify(row.getLength() >= 2, "row data is expected to be at least 2 bytes long");
currentIteratorProcessedBytes += row.getLength();
short entryRowCount = getShortLittleEndian(row.getArray(), row.getOffset());
rowCount += entryRowCount;
currentIteratorProcessedRows += entryRowCount;
output.writeBytes(row.getArray(), row.getOffset() + 2, row.getLength() - 2);
} else {
throw new IllegalArgumentException("Unexpected PrestoSparkMutableRow: 'buffer' and 'array' fields are both null");
}
}
long end = System.currentTimeMillis();
shuffleStats.accumulate(currentIteratorProcessedRows, currentIteratorProcessedRowBatches, currentIteratorProcessedBytes, end - start);
if (!iterator.hasNext()) {
shuffleStatsCollector.add(new PrestoSparkShuffleStats(input.getFragmentId(), taskId, READ, shuffleStats.getProcessedRows(), shuffleStats.getProcessedRowBatches(), shuffleStats.getProcessedBytes(), shuffleStats.getElapsedWallTimeMills()));
shuffleStats.reset();
currentIteratorIndex++;
} else {
break;
}
}
}
if (rowCount == 0) {
return null;
}
return createPage(rowCount, output.slice().getInput(), types);
}
use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.
the class PrestoSparkRddFactory method createRdd.
private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
Optional<Integer> numberOfShufflePartitions = Optional.empty();
Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
shuffleInputRddMap.put(input.getKey().toString(), rdd);
if (!numberOfShufflePartitions.isPresent()) {
numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
} else {
checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
}
}
PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
if (!tableScans.isEmpty()) {
try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
}
} else if (rddInputs.size() == 0) {
checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
// In case of no inputs we still need to schedule a task.
// Task with no inputs may produce results (e.g.: ValuesNode).
// To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
// Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
} else {
taskSourceRdd = Optional.empty();
}
return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
use of com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow in project presto by prestodb.
the class TestPrestoSparkRowBatch method getEntries.
private static List<List<Row>> getEntries(PrestoSparkRowBatch rowBatch) {
ImmutableList.Builder<List<Row>> entries = ImmutableList.builder();
RowTupleSupplier rowTupleSupplier = rowBatch.createRowTupleSupplier();
while (true) {
Tuple2<MutablePartitionId, PrestoSparkMutableRow> next = rowTupleSupplier.getNext();
if (next == null) {
break;
}
ImmutableList.Builder<Row> entry = ImmutableList.builder();
int partition = next._1.getPartition();
PrestoSparkMutableRow mutableRow = next._2;
ByteBuffer buffer = mutableRow.getBuffer();
buffer.order(LITTLE_ENDIAN);
short rowCount = buffer.getShort();
assertEquals(mutableRow.getPositionCount(), rowCount);
for (int i = 0; i < rowCount; i++) {
entry.add(new Row(partition, readRowData(buffer)));
}
entries.add(entry.build());
}
return entries.build();
}
Aggregations