use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.
the class TestStringToRowDataConverter method testRowDataToAvroStringToRowData.
@Test
void testRowDataToAvroStringToRowData() {
GenericRowData rowData = new GenericRowData(7);
rowData.setField(0, 1.1f);
rowData.setField(1, 3.4D);
rowData.setField(2, (int) LocalDate.parse("2021-03-30").toEpochDay());
rowData.setField(3, LocalTime.parse("15:44:29").get(ChronoField.MILLI_OF_DAY));
rowData.setField(4, TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29Z")));
rowData.setField(5, TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29.666111Z")));
rowData.setField(6, DecimalData.fromBigDecimal(new BigDecimal("12345.67"), 7, 2));
DataType dataType = DataTypes.ROW(DataTypes.FIELD("f_float", DataTypes.FLOAT()), DataTypes.FIELD("f_double", DataTypes.DOUBLE()), DataTypes.FIELD("f_date", DataTypes.DATE()), DataTypes.FIELD("f_time", DataTypes.TIME(3)), DataTypes.FIELD("f_timestamp", DataTypes.TIMESTAMP(3)), DataTypes.FIELD("f_timestamp_micros", DataTypes.TIMESTAMP(6)), DataTypes.FIELD("f_decimal", DataTypes.DECIMAL(7, 2)));
RowType rowType = (RowType) dataType.getLogicalType();
RowDataToAvroConverters.RowDataToAvroConverter converter = RowDataToAvroConverters.createConverter(rowType);
GenericRecord avroRecord = (GenericRecord) converter.convert(AvroSchemaConverter.convertToSchema(rowType), rowData);
StringToRowDataConverter stringToRowDataConverter = new StringToRowDataConverter(rowType.getChildren().toArray(new LogicalType[0]));
final String recordKey = KeyGenUtils.getRecordKey(avroRecord, rowType.getFieldNames(), false);
final String[] recordKeys = KeyGenUtils.extractRecordKeys(recordKey);
Object[] convertedKeys = stringToRowDataConverter.convert(recordKeys);
GenericRowData converted = new GenericRowData(7);
for (int i = 0; i < 7; i++) {
converted.setField(i, convertedKeys[i]);
}
assertThat(converted, is(rowData));
}
use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.
the class MergeOnReadInputFormat method getLogFileIterator.
private ClosableIterator<RowData> getLogFileIterator(MergeOnReadInputSplit split) {
final Schema tableSchema = new Schema.Parser().parse(tableState.getAvroSchema());
final Schema requiredSchema = new Schema.Parser().parse(tableState.getRequiredAvroSchema());
final GenericRecordBuilder recordBuilder = new GenericRecordBuilder(requiredSchema);
final AvroToRowDataConverters.AvroToRowDataConverter avroToRowDataConverter = AvroToRowDataConverters.createRowConverter(tableState.getRequiredRowType());
final HoodieMergedLogRecordScanner scanner = FormatUtils.logScanner(split, tableSchema, hadoopConf, conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED));
final Iterator<String> logRecordsKeyIterator = scanner.getRecords().keySet().iterator();
final int[] pkOffset = tableState.getPkOffsetsInRequired();
// flag saying whether the pk semantics has been dropped by user specified
// projections. For e.g, if the pk fields are [a, b] but user only select a,
// then the pk semantics is lost.
final boolean pkSemanticLost = Arrays.stream(pkOffset).anyMatch(offset -> offset == -1);
final LogicalType[] pkTypes = pkSemanticLost ? null : tableState.getPkTypes(pkOffset);
final StringToRowDataConverter converter = pkSemanticLost ? null : new StringToRowDataConverter(pkTypes);
return new ClosableIterator<RowData>() {
private RowData currentRecord;
@Override
public boolean hasNext() {
while (logRecordsKeyIterator.hasNext()) {
String curAvroKey = logRecordsKeyIterator.next();
Option<IndexedRecord> curAvroRecord = null;
final HoodieAvroRecord<?> hoodieRecord = (HoodieAvroRecord) scanner.getRecords().get(curAvroKey);
try {
curAvroRecord = hoodieRecord.getData().getInsertValue(tableSchema);
} catch (IOException e) {
throw new HoodieException("Get avro insert value error for key: " + curAvroKey, e);
}
if (!curAvroRecord.isPresent()) {
// delete record found
if (emitDelete && !pkSemanticLost) {
GenericRowData delete = new GenericRowData(tableState.getRequiredRowType().getFieldCount());
final String recordKey = hoodieRecord.getRecordKey();
final String[] pkFields = KeyGenUtils.extractRecordKeys(recordKey);
final Object[] converted = converter.convert(pkFields);
for (int i = 0; i < pkOffset.length; i++) {
delete.setField(pkOffset[i], converted[i]);
}
delete.setRowKind(RowKind.DELETE);
this.currentRecord = delete;
return true;
}
// skipping if the condition is unsatisfied
// continue;
} else {
final IndexedRecord avroRecord = curAvroRecord.get();
final RowKind rowKind = FormatUtils.getRowKindSafely(avroRecord, tableState.getOperationPos());
if (rowKind == RowKind.DELETE && !emitDelete) {
// skip the delete record
continue;
}
GenericRecord requiredAvroRecord = buildAvroRecordBySchema(avroRecord, requiredSchema, requiredPos, recordBuilder);
currentRecord = (RowData) avroToRowDataConverter.convert(requiredAvroRecord);
currentRecord.setRowKind(rowKind);
return true;
}
}
return false;
}
@Override
public RowData next() {
return currentRecord;
}
@Override
public void close() {
scanner.close();
}
};
}
use of org.apache.hudi.util.StringToRowDataConverter in project hudi by apache.
the class TestStringToRowDataConverter method testConvert.
@Test
void testConvert() {
String[] fields = new String[] { "1.1", "3.4", "2021-03-30", "56669000", "1617119069000", "1617119069666111", "12345.67" };
LogicalType[] fieldTypes = new LogicalType[] { DataTypes.FLOAT().getLogicalType(), DataTypes.DOUBLE().getLogicalType(), DataTypes.DATE().getLogicalType(), DataTypes.TIME(3).getLogicalType(), DataTypes.TIMESTAMP(3).getLogicalType(), DataTypes.TIMESTAMP(6).getLogicalType(), DataTypes.DECIMAL(7, 2).getLogicalType() };
StringToRowDataConverter converter = new StringToRowDataConverter(fieldTypes);
Object[] converted = converter.convert(fields);
Object[] expected = new Object[] { 1.1f, 3.4D, (int) LocalDate.parse("2021-03-30").toEpochDay(), LocalTime.parse("15:44:29").get(ChronoField.MILLI_OF_DAY), TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29Z")), TimestampData.fromInstant(Instant.parse("2021-03-30T15:44:29.666111Z")), DecimalData.fromBigDecimal(new BigDecimal("12345.67"), 7, 2) };
assertArrayEquals(expected, converted);
}
Aggregations