Search in sources :

Example 1 with StringToRowDataConverter

use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.

the class TestStringToRowDataConverter method testRowDataToAvroStringToRowData.

@Test
void testRowDataToAvroStringToRowData() {
    GenericRowData rowData = new GenericRowData(7);
    rowData.setField(0, 1.1f);
    rowData.setField(1, 3.4D);
    rowData.setField(2, (int) LocalDate.parse("2021-03-30").toEpochDay());
    rowData.setField(3, LocalTime.parse("15:44:29").get(ChronoField.MILLI_OF_DAY));
    rowData.setField(4, TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29Z")));
    rowData.setField(5, TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29.666111Z")));
    rowData.setField(6, DecimalData.fromBigDecimal(new BigDecimal("12345.67"), 7, 2));
    DataType dataType = DataTypes.ROW(DataTypes.FIELD("f_float", DataTypes.FLOAT()), DataTypes.FIELD("f_double", DataTypes.DOUBLE()), DataTypes.FIELD("f_date", DataTypes.DATE()), DataTypes.FIELD("f_time", DataTypes.TIME(3)), DataTypes.FIELD("f_timestamp", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("f_timestamp_micros", DataTypes.TIMESTAMP(6)), DataTypes.FIELD("f_decimal", DataTypes.DECIMAL(7, 2)));
    RowType rowType = (RowType) dataType.getLogicalType();
    RowDataToAvroConverters.RowDataToAvroConverter converter = RowDataToAvroConverters.createConverter(rowType);
    GenericRecord avroRecord = (GenericRecord) converter.convert(AvroSchemaConverter.convertToSchema(rowType), rowData);
    StringToRowDataConverter stringToRowDataConverter = new StringToRowDataConverter(rowType.getChildren().toArray(new LogicalType[0]));
    final String recordKey = KeyGenUtils.getRecordKey(avroRecord, rowType.getFieldNames(), false);
    final String[] recordKeys = KeyGenUtils.extractRecordKeys(recordKey);
    Object[] convertedKeys = stringToRowDataConverter.convert(recordKeys);
    GenericRowData converted = new GenericRowData(7);
    for (int i = 0; i < 7; i++) {
        converted.setField(i, convertedKeys[i]);
    }
    assertThat(converted, is(rowData));
}
Also used : RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) StringToRowDataConverter(org.apache.hudi.util.StringToRowDataConverter) BigDecimal(java.math.BigDecimal) RowDataToAvroConverters(org.apache.hudi.util.RowDataToAvroConverters) GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.jupiter.api.Test)

Example 2 with StringToRowDataConverter

use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.

the class MergeOnReadInputFormat method getLogFileIterator.

private ClosableIterator<RowData> getLogFileIterator(MergeOnReadInputSplit split) {
    final Schema tableSchema = new Schema.Parser().parse(tableState.getAvroSchema());
    final Schema requiredSchema = new Schema.Parser().parse(tableState.getRequiredAvroSchema());
    final GenericRecordBuilder recordBuilder = new GenericRecordBuilder(requiredSchema);
    final AvroToRowDataConverters.AvroToRowDataConverter avroToRowDataConverter = AvroToRowDataConverters.createRowConverter(tableState.getRequiredRowType());
    final HoodieMergedLogRecordScanner scanner = FormatUtils.logScanner(split, tableSchema, hadoopConf, conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED));
    final Iterator<String> logRecordsKeyIterator = scanner.getRecords().keySet().iterator();
    final int[] pkOffset = tableState.getPkOffsetsInRequired();
    // flag saying whether the pk semantics has been dropped by user specified
    // projections. For e.g, if the pk fields are [a, b] but user only select a,
    // then the pk semantics is lost.
    final boolean pkSemanticLost = Arrays.stream(pkOffset).anyMatch(offset -> offset == -1);
    final LogicalType[] pkTypes = pkSemanticLost ? null : tableState.getPkTypes(pkOffset);
    final StringToRowDataConverter converter = pkSemanticLost ? null : new StringToRowDataConverter(pkTypes);
    return new ClosableIterator<RowData>() {

        private RowData currentRecord;

        @Override
        public boolean hasNext() {
            while (logRecordsKeyIterator.hasNext()) {
                String curAvroKey = logRecordsKeyIterator.next();
                Option<IndexedRecord> curAvroRecord = null;
                final HoodieAvroRecord<?> hoodieRecord = (HoodieAvroRecord) scanner.getRecords().get(curAvroKey);
                try {
                    curAvroRecord = hoodieRecord.getData().getInsertValue(tableSchema);
                } catch (IOException e) {
                    throw new HoodieException("Get avro insert value error for key: " + curAvroKey, e);
                }
                if (!curAvroRecord.isPresent()) {
                    // delete record found
                    if (emitDelete && !pkSemanticLost) {
                        GenericRowData delete = new GenericRowData(tableState.getRequiredRowType().getFieldCount());
                        final String recordKey = hoodieRecord.getRecordKey();
                        final String[] pkFields = KeyGenUtils.extractRecordKeys(recordKey);
                        final Object[] converted = converter.convert(pkFields);
                        for (int i = 0; i < pkOffset.length; i++) {
                            delete.setField(pkOffset[i], converted[i]);
                        }
                        delete.setRowKind(RowKind.DELETE);
                        this.currentRecord = delete;
                        return true;
                    }
                // skipping if the condition is unsatisfied
                // continue;
                } else {
                    final IndexedRecord avroRecord = curAvroRecord.get();
                    final RowKind rowKind = FormatUtils.getRowKindSafely(avroRecord, tableState.getOperationPos());
                    if (rowKind == RowKind.DELETE && !emitDelete) {
                        // skip the delete record
                        continue;
                    }
                    GenericRecord requiredAvroRecord = buildAvroRecordBySchema(avroRecord, requiredSchema, requiredPos, recordBuilder);
                    currentRecord = (RowData) avroToRowDataConverter.convert(requiredAvroRecord);
                    currentRecord.setRowKind(rowKind);
                    return true;
                }
            }
            return false;
        }

        @Override
        public RowData next() {
            return currentRecord;
        }

        @Override
        public void close() {
            scanner.close();
        }
    };
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) FormatUtils.buildAvroRecordBySchema(org.apache.hudi.table.format.FormatUtils.buildAvroRecordBySchema) Schema(org.apache.avro.Schema) LogicalType(org.apache.flink.table.types.logical.LogicalType) StringToRowDataConverter(org.apache.hudi.util.StringToRowDataConverter) HoodieException(org.apache.hudi.exception.HoodieException) AvroToRowDataConverters(org.apache.hudi.util.AvroToRowDataConverters) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRowData(org.apache.flink.table.data.GenericRowData) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) IOException(java.io.IOException) RowKind(org.apache.flink.types.RowKind)

Example 3 with StringToRowDataConverter

use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.

the class TestStringToRowDataConverter method testConvert.

@Test
void testConvert() {
    String[] fields = new String[] { "1.1", "3.4", "2021-03-30", "56669000", "1617119069000", "1617119069666111", "12345.67" };
    LogicalType[] fieldTypes = new LogicalType[] { DataTypes.FLOAT().getLogicalType(), DataTypes.DOUBLE().getLogicalType(), DataTypes.DATE().getLogicalType(), DataTypes.TIME(3).getLogicalType(), DataTypes.TIMESTAMP(3).getLogicalType(), DataTypes.TIMESTAMP(6).getLogicalType(), DataTypes.DECIMAL(7, 2).getLogicalType() };
    StringToRowDataConverter converter = new StringToRowDataConverter(fieldTypes);
    Object[] converted = converter.convert(fields);
    Object[] expected = new Object[] { 1.1f, 3.4D, (int) LocalDate.parse("2021-03-30").toEpochDay(), LocalTime.parse("15:44:29").get(ChronoField.MILLI_OF_DAY), TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29Z")), TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29.666111Z")), DecimalData.fromBigDecimal(new BigDecimal("12345.67"), 7, 2) };
    assertArrayEquals(expected, converted);
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType) StringToRowDataConverter(org.apache.hudi.util.StringToRowDataConverter) BigDecimal(java.math.BigDecimal) Test(org.junit.jupiter.api.Test)

Aggregations

LogicalType (org.apache.flink.table.types.logical.LogicalType)3 StringToRowDataConverter (org.apache.hudi.util.StringToRowDataConverter)3 BigDecimal (java.math.BigDecimal)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 GenericRowData (org.apache.flink.table.data.GenericRowData)2 Test (org.junit.jupiter.api.Test)2 IOException (java.io.IOException)1 Schema (org.apache.avro.Schema)1 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 RowData (org.apache.flink.table.data.RowData)1 DataType (org.apache.flink.table.types.DataType)1 RowType (org.apache.flink.table.types.logical.RowType)1 RowKind (org.apache.flink.types.RowKind)1 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)1 HoodieMergedLogRecordScanner (org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner)1 ClosableIterator (org.apache.hudi.common.util.ClosableIterator)1 HoodieException (org.apache.hudi.exception.HoodieException)1 FormatUtils.buildAvroRecordBySchema (org.apache.hudi.table.format.FormatUtils.buildAvroRecordBySchema)1 AvroToRowDataConverters (org.apache.hudi.util.AvroToRowDataConverters)1