Search in sources :

Example 81 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class RowDataToHoodieFunction method toHoodieRecord.

/**
 * Converts the give record to a {@link HoodieRecord}.
 *
 * @param record The input record
 * @return HoodieRecord based on the configuration
 * @throws IOException if error occurs
 */
@SuppressWarnings("rawtypes")
private HoodieRecord toHoodieRecord(I record) throws Exception {
    GenericRecord gr = (GenericRecord) this.converter.convert(this.avroSchema, record);
    final HoodieKey hoodieKey = keyGenerator.getKey(gr);
    HoodieRecordPayload payload = payloadCreation.createPayload(gr);
    HoodieOperation operation = HoodieOperation.fromValue(record.getRowKind().toByteValue());
    return new HoodieAvroRecord<>(hoodieKey, payload, operation);
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieOperation(org.apache.hudi.common.model.HoodieOperation) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Example 82 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class MultipleSparkJobExecutionStrategy method transform.

/**
 * Transform IndexedRecord into HoodieRecord.
 */
private static <T> HoodieRecord<T> transform(IndexedRecord indexedRecord, HoodieWriteConfig writeConfig) {
    GenericRecord record = (GenericRecord) indexedRecord;
    Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
    if (!writeConfig.populateMetaFields()) {
        try {
            keyGeneratorOpt = Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(writeConfig.getProps()));
        } catch (IOException e) {
            throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
        }
    }
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
    String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
    HoodieKey hoodieKey = new HoodieKey(key, partition);
    HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
    return hoodieRecord;
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator)

Example 83 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class SingleSparkJobExecutionStrategy method transform.

/**
 * Transform IndexedRecord into HoodieRecord.
 */
private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
    GenericRecord record = (GenericRecord) indexedRecord;
    Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
    if (!getWriteConfig().populateMetaFields()) {
        try {
            keyGeneratorOpt = Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(getWriteConfig().getProps())));
        } catch (IOException e) {
            throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
        }
    }
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
    String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
    HoodieKey hoodieKey = new HoodieKey(key, partition);
    HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
    return hoodieRecord;
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator)

Example 84 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class RDDSpatialCurveSortPartitioner method repartitionRecords.

@Override
public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
    SerializableSchema serializableSchema = new SerializableSchema(schema);
    JavaRDD<GenericRecord> genericRecordsRDD = records.map(f -> (GenericRecord) f.getData().getInsertValue(serializableSchema.get()).get());
    Dataset<Row> sourceDataset = AvroConversionUtils.createDataFrame(genericRecordsRDD.rdd(), schema.toString(), sparkEngineContext.getSqlContext().sparkSession());
    Dataset<Row> sortedDataset = reorder(sourceDataset, outputSparkPartitions);
    return HoodieSparkUtils.createRdd(sortedDataset, schema.getName(), schema.getNamespace(), false, Option.empty()).toJavaRDD().map(record -> {
        String key = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
        String partition = record.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
        HoodieKey hoodieKey = new HoodieKey(key, partition);
        HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, new RewriteAvroPayload(record));
        return hoodieRecord;
    });
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) Row(org.apache.spark.sql.Row) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) SerializableSchema(org.apache.hudi.common.config.SerializableSchema)

Aggregations

HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)84 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)72 HoodieKey (org.apache.hudi.common.model.HoodieKey)68 ArrayList (java.util.ArrayList)38 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)37 RawTripTestPayload (org.apache.hudi.common.testutils.RawTripTestPayload)31 Test (org.junit.jupiter.api.Test)30 GenericRecord (org.apache.avro.generic.GenericRecord)29 Path (org.apache.hadoop.fs.Path)26 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)25 IOException (java.io.IOException)24 HoodieTable (org.apache.hudi.table.HoodieTable)24 List (java.util.List)23 Schema (org.apache.avro.Schema)23 HashMap (java.util.HashMap)22 Pair (org.apache.hudi.common.util.collection.Pair)21 Map (java.util.Map)20 Collectors (java.util.stream.Collectors)20 Arrays (java.util.Arrays)17 Option (org.apache.hudi.common.util.Option)16