use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class TestDataFrameWrites method createDataset.
private Dataset<Row> createDataset(Iterable<Record> records, Schema schema) throws IOException {
// this uses the SparkAvroReader to create a DataFrame from the list of records
// it assumes that SparkAvroReader is correct
File testFile = temp.newFile();
Assert.assertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
for (Record rec : records) {
writer.add(rec);
}
}
// make sure the dataframe matches the records before moving on
List<InternalRow> rows = Lists.newArrayList();
try (AvroIterable<InternalRow> reader = Avro.read(Files.localInput(testFile)).createReaderFunc(SparkAvroReader::new).project(schema).build()) {
Iterator<Record> recordIter = records.iterator();
Iterator<InternalRow> readIter = reader.iterator();
while (recordIter.hasNext() && readIter.hasNext()) {
InternalRow row = readIter.next();
assertEqualsUnsafe(schema.asStruct(), recordIter.next(), row);
rows.add(row);
}
Assert.assertEquals("Both iterators should be exhausted", recordIter.hasNext(), readIter.hasNext());
}
JavaRDD<InternalRow> rdd = sc.parallelize(rows);
return spark.internalCreateDataFrame(JavaRDD.toRDD(rdd), convert(schema), false);
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class TestHelpers method assertEqualsBatch.
public static void assertEqualsBatch(Types.StructType struct, Iterator<Record> expected, ColumnarBatch batch, boolean checkArrowValidityVector) {
for (int rowId = 0; rowId < batch.numRows(); rowId++) {
List<Types.NestedField> fields = struct.fields();
InternalRow row = batch.getRow(rowId);
Record rec = expected.next();
for (int i = 0; i < fields.size(); i += 1) {
Type fieldType = fields.get(i).type();
Object expectedValue = rec.get(i);
Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));
assertEqualsUnsafe(fieldType, expectedValue, actualValue);
if (checkArrowValidityVector) {
ColumnVector columnVector = batch.column(i);
ValueVector arrowVector = ((IcebergArrowColumnVector) columnVector).vectorAccessor().getVector();
Assert.assertFalse("Nullability doesn't match of " + columnVector.dataType(), expectedValue == null ^ arrowVector.isNull(rowId));
}
}
}
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class TestOrcWrite method splitOffsets.
@Test
public void splitOffsets() throws IOException {
File testFile = temp.newFile();
Assert.assertTrue("Delete should succeed", testFile.delete());
Iterable<InternalRow> rows = RandomData.generateSpark(SCHEMA, 1, 0L);
FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(SCHEMA).build();
writer.addAll(rows);
writer.close();
Assert.assertNotNull("Split offsets not present", writer.splitOffsets());
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class TestSparkPartitioningWriters method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
use of org.apache.spark.sql.catalyst.InternalRow in project iceberg by apache.
the class TestSparkPositionDeltaWriters method toRow.
@Override
protected InternalRow toRow(Integer id, String data) {
InternalRow row = new GenericInternalRow(2);
row.update(0, id);
row.update(1, UTF8String.fromString(data));
return row;
}
Aggregations