use of org.apache.spark.sql.catalyst.expressions.GenericInternalRow in project iceberg by apache.
the class TestSparkPartitioningWriters method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
use of org.apache.spark.sql.catalyst.expressions.GenericInternalRow in project iceberg by apache.
the class TestSparkPositionDeltaWriters method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
use of org.apache.spark.sql.catalyst.expressions.GenericInternalRow in project iceberg by apache.
the class TestSparkRollingFileWriters method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
use of org.apache.spark.sql.catalyst.expressions.GenericInternalRow in project iceberg by apache.
the class TestPartitionPruning method createTestDataset.
private Dataset<Row> createTestDataset() {
List<InternalRow> rows = LOGS.stream().map(logMessage -> {
Object[] underlying = new Object[] { logMessage.getId(), UTF8String.fromString(logMessage.getDate()), UTF8String.fromString(logMessage.getLevel()), UTF8String.fromString(logMessage.getMessage()), // discard the nanoseconds part to simplify
TimeUnit.MILLISECONDS.toMicros(logMessage.getTimestamp().toEpochMilli()) };
return new GenericInternalRow(underlying);
}).collect(Collectors.toList());
JavaRDD<InternalRow> rdd = sparkContext.parallelize(rows);
Dataset<Row> df = spark.internalCreateDataFrame(JavaRDD.toRDD(rdd), SparkSchemaUtil.convert(LOG_SCHEMA), false);
return df.selectExpr("id", "date", "level", "message", "timestamp").selectExpr("id", "date", "level", "message", "timestamp", "bucket3(id) AS bucket_id", "truncate5(message) AS truncated_message", "hour(timestamp) AS ts_hour");
}
use of org.apache.spark.sql.catalyst.expressions.GenericInternalRow in project iceberg by apache.
the class TestSparkFileWriterFactory method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
Aggregations