Search in sources :

Example 96 with InternalRow

use of org.apache.spark.sql.catalyst.InternalRow in project hudi by apache.

the class TestHoodieInternalRow method testIsNullCheck.

@Test
public void testIsNullCheck() {
    for (int i = 0; i < 16; i++) {
        Object[] values = getRandomValue(true);
        InternalRow row = new GenericInternalRow(values);
        HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
        hoodieInternalRow.setNullAt(i);
        nullIndices.clear();
        nullIndices.add(i);
        assertValues(hoodieInternalRow, "commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", values, nullIndices);
    }
    // run it for 5 rounds
    for (int i = 0; i < 5; i++) {
        int numNullValues = 1 + RANDOM.nextInt(4);
        List<Integer> nullsSoFar = new ArrayList<>();
        while (nullsSoFar.size() < numNullValues) {
            int randomIndex = RANDOM.nextInt(16);
            if (!nullsSoFar.contains(randomIndex)) {
                nullsSoFar.add(randomIndex);
            }
        }
        Object[] values = getRandomValue(true);
        InternalRow row = new GenericInternalRow(values);
        HoodieInternalRow hoodieInternalRow = new HoodieInternalRow("commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", row);
        nullIndices.clear();
        for (Integer index : nullsSoFar) {
            hoodieInternalRow.setNullAt(index);
            nullIndices.add(index);
        }
        assertValues(hoodieInternalRow, "commitTime", "commitSeqNo", "recordKey", "partitionPath", "fileName", values, nullIndices);
    }
}
Also used : GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow) ArrayList(java.util.ArrayList) InternalRow(org.apache.spark.sql.catalyst.InternalRow) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow) Test(org.junit.jupiter.api.Test)

Example 97 with InternalRow

use of org.apache.spark.sql.catalyst.InternalRow in project hudi by apache.

the class SparkDatasetTestUtils method serializeRow.

private static InternalRow serializeRow(ExpressionEncoder encoder, Row row) throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, ClassNotFoundException {
    // TODO remove reflection if Spark 2.x support is dropped
    if (package$.MODULE$.SPARK_VERSION().startsWith("2.")) {
        Method spark2method = encoder.getClass().getMethod("toRow", Object.class);
        return (InternalRow) spark2method.invoke(encoder, row);
    } else {
        Class<?> serializerClass = Class.forName("org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer");
        Object serializer = encoder.getClass().getMethod("createSerializer").invoke(encoder);
        Method aboveSpark2method = serializerClass.getMethod("apply", Object.class);
        return (InternalRow) aboveSpark2method.invoke(serializer, row);
    }
}
Also used : Method(java.lang.reflect.Method) InternalRow(org.apache.spark.sql.catalyst.InternalRow) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow)

Example 98 with InternalRow

use of org.apache.spark.sql.catalyst.InternalRow in project hudi by apache.

the class SparkDatasetTestUtils method toInternalRows.

/**
 * Convert Dataset<Row>s to List of {@link InternalRow}s.
 *
 * @param rows Dataset<Row>s to be converted
 * @return the List of {@link InternalRow}s thus converted.
 */
public static List<InternalRow> toInternalRows(Dataset<Row> rows, ExpressionEncoder encoder) throws Exception {
    List<InternalRow> toReturn = new ArrayList<>();
    List<Row> rowList = rows.collectAsList();
    for (Row row : rowList) {
        toReturn.add(serializeRow(encoder, row).copy());
    }
    return toReturn;
}
Also used : ArrayList(java.util.ArrayList) InternalRow(org.apache.spark.sql.catalyst.InternalRow) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow) GenericRow(org.apache.spark.sql.catalyst.expressions.GenericRow) Row(org.apache.spark.sql.Row) InternalRow(org.apache.spark.sql.catalyst.InternalRow) GenericInternalRow(org.apache.spark.sql.catalyst.expressions.GenericInternalRow)

Example 99 with InternalRow

use of org.apache.spark.sql.catalyst.InternalRow in project hudi by apache.

the class TestComplexKeyGenerator method testMultipleValueKeyGenerator.

@Test
public void testMultipleValueKeyGenerator() {
    TypedProperties properties = new TypedProperties();
    properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key,timestamp");
    properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "rider,driver");
    ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
    assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
    assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 2);
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
    String rowKey = "_row_key" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + "," + "timestamp" + ComplexAvroKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString();
    String partitionPath = record.get("rider").toString() + "/" + record.get("driver").toString();
    HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
    assertEquals(rowKey, hoodieKey.getRecordKey());
    assertEquals(partitionPath, hoodieKey.getPartitionPath());
    Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA, AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
    Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
    InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
    Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
}
Also used : HoodieKey(org.apache.hudi.common.model.HoodieKey) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) TypedProperties(org.apache.hudi.common.config.TypedProperties) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Test(org.junit.jupiter.api.Test)

Example 100 with InternalRow

use of org.apache.spark.sql.catalyst.InternalRow in project hudi by apache.

the class TestCustomKeyGenerator method testSimpleKeyGenerator.

public void testSimpleKeyGenerator(TypedProperties props) throws IOException {
    BuiltinKeyGenerator keyGenerator = (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
    GenericRecord record = getRecord();
    HoodieKey key = keyGenerator.getKey(record);
    Assertions.assertEquals(key.getRecordKey(), "key1");
    Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
    Row row = KeyGeneratorTestUtilities.getRow(record);
    Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
    Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
    InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
    Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686");
}
Also used : HoodieKey(org.apache.hudi.common.model.HoodieKey) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) GenericRecord(org.apache.avro.generic.GenericRecord) InternalRow(org.apache.spark.sql.catalyst.InternalRow)

Aggregations

InternalRow (org.apache.spark.sql.catalyst.InternalRow)110 GenericInternalRow (org.apache.spark.sql.catalyst.expressions.GenericInternalRow)33 Row (org.apache.spark.sql.Row)30 StructType (org.apache.spark.sql.types.StructType)29 Test (org.junit.Test)28 Schema (org.apache.iceberg.Schema)17 ArrayList (java.util.ArrayList)16 List (java.util.List)16 Test (org.junit.jupiter.api.Test)14 File (java.io.File)13 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)13 IOException (java.io.IOException)12 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)12 Types (org.apache.iceberg.types.Types)12 OutputFileFactory (org.apache.iceberg.io.OutputFileFactory)11 GenericRecord (org.apache.avro.generic.GenericRecord)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)10 FileAppender (org.apache.iceberg.io.FileAppender)10 Map (java.util.Map)9 Assert (org.junit.Assert)9