use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class WritersBenchmark method writePartitionedLegacyFanoutDataWriter.
@Benchmark
@Threads(1)
public void writePartitionedLegacyFanoutDataWriter(Blackhole blackhole) throws IOException {
FileIO io = table().io();
OutputFileFactory fileFactory = newFileFactory();
Schema writeSchema = table().schema();
StructType sparkWriteType = SparkSchemaUtil.convert(writeSchema);
SparkAppenderFactory appenders = SparkAppenderFactory.builderFor(table(), writeSchema, sparkWriteType).spec(partitionedSpec).build();
TaskWriter<InternalRow> writer = new SparkPartitionedFanoutWriter(partitionedSpec, fileFormat(), appenders, fileFactory, io, TARGET_FILE_SIZE_IN_BYTES, writeSchema, sparkWriteType);
try (TaskWriter<InternalRow> closableWriter = writer) {
for (InternalRow row : rows) {
closableWriter.write(row);
}
}
blackhole.consume(writer.complete());
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class WritersBenchmark method writePartitionedFanoutDataWriter.
@Benchmark
@Threads(1)
public void writePartitionedFanoutDataWriter(Blackhole blackhole) throws IOException {
FileIO io = table().io();
OutputFileFactory fileFactory = newFileFactory();
SparkFileWriterFactory writerFactory = SparkFileWriterFactory.builderFor(table()).dataFileFormat(fileFormat()).dataSchema(table().schema()).build();
FanoutDataWriter<InternalRow> writer = new FanoutDataWriter<>(writerFactory, fileFactory, io, fileFormat(), TARGET_FILE_SIZE_IN_BYTES);
PartitionKey partitionKey = new PartitionKey(partitionedSpec, table().schema());
StructType dataSparkType = SparkSchemaUtil.convert(table().schema());
InternalRowWrapper internalRowWrapper = new InternalRowWrapper(dataSparkType);
try (FanoutDataWriter<InternalRow> closeableWriter = writer) {
for (InternalRow row : rows) {
partitionKey.partition(internalRowWrapper.wrap(row));
closeableWriter.write(row, partitionedSpec, partitionKey);
}
}
blackhole.consume(writer);
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class RewriteManifestsProcedure method toOutputRows.
private InternalRow[] toOutputRows(RewriteManifests.Result result) {
int rewrittenManifestsCount = Iterables.size(result.rewrittenManifests());
int addedManifestsCount = Iterables.size(result.addedManifests());
InternalRow row = newInternalRow(rewrittenManifestsCount, addedManifestsCount);
return new InternalRow[] { row };
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class RollbackToSnapshotProcedure method call.
@Override
public InternalRow[] call(InternalRow args) {
Identifier tableIdent = toIdentifier(args.getString(0), PARAMETERS[0].name());
long snapshotId = args.getLong(1);
return modifyIcebergTable(tableIdent, table -> {
Snapshot previousSnapshot = table.currentSnapshot();
table.manageSnapshots().rollbackTo(snapshotId).commit();
InternalRow outputRow = newInternalRow(previousSnapshot.snapshotId(), snapshotId);
return new InternalRow[] { outputRow };
});
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class AddFilesProcedure method call.
@Override
public InternalRow[] call(InternalRow args) {
Identifier tableIdent = toIdentifier(args.getString(0), PARAMETERS[0].name());
CatalogPlugin sessionCat = spark().sessionState().catalogManager().v2SessionCatalog();
Identifier sourceIdent = toCatalogAndIdentifier(args.getString(1), PARAMETERS[1].name(), sessionCat).identifier();
Map<String, String> partitionFilter = Maps.newHashMap();
if (!args.isNullAt(2)) {
args.getMap(2).foreach(DataTypes.StringType, DataTypes.StringType, (k, v) -> {
partitionFilter.put(k.toString(), v.toString());
return BoxedUnit.UNIT;
});
}
boolean checkDuplicateFiles;
if (args.isNullAt(3)) {
checkDuplicateFiles = true;
} else {
checkDuplicateFiles = args.getBoolean(3);
}
long addedFilesCount = importToIceberg(tableIdent, sourceIdent, partitionFilter, checkDuplicateFiles);
return new InternalRow[] { newInternalRow(addedFilesCount) };
}
Aggregations