Search in sources :

Example 1 with RewriteAvroPayload

use of org.apache.hudi.common.model.RewriteAvroPayload in project hudi by apache.

the class JavaExecutionStrategy method transform.

/**
 * Transform IndexedRecord into HoodieRecord.
 */
private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
    GenericRecord record = (GenericRecord) indexedRecord;
    Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
    String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
    HoodieKey hoodieKey = new HoodieKey(key, partition);
    HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
    return hoodieRecord;
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator)

Example 2 with RewriteAvroPayload

use of org.apache.hudi.common.model.RewriteAvroPayload in project hudi by apache.

the class MultipleSparkJobExecutionStrategy method transform.

/**
 * Transform IndexedRecord into HoodieRecord.
 */
private static <T> HoodieRecord<T> transform(IndexedRecord indexedRecord, HoodieWriteConfig writeConfig) {
    GenericRecord record = (GenericRecord) indexedRecord;
    Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
    if (!writeConfig.populateMetaFields()) {
        try {
            keyGeneratorOpt = Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(writeConfig.getProps()));
        } catch (IOException e) {
            throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
        }
    }
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
    String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
    HoodieKey hoodieKey = new HoodieKey(key, partition);
    HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
    return hoodieRecord;
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator)

Example 3 with RewriteAvroPayload

use of org.apache.hudi.common.model.RewriteAvroPayload in project hudi by apache.

the class SingleSparkJobExecutionStrategy method transform.

/**
 * Transform IndexedRecord into HoodieRecord.
 */
private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
    GenericRecord record = (GenericRecord) indexedRecord;
    Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
    if (!getWriteConfig().populateMetaFields()) {
        try {
            keyGeneratorOpt = Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(getWriteConfig().getProps())));
        } catch (IOException e) {
            throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
        }
    }
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
    String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
    HoodieKey hoodieKey = new HoodieKey(key, partition);
    HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
    return hoodieRecord;
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator)

Example 4 with RewriteAvroPayload

use of org.apache.hudi.common.model.RewriteAvroPayload in project hudi by apache.

the class RDDSpatialCurveSortPartitioner method repartitionRecords.

@Override
public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
    SerializableSchema serializableSchema = new SerializableSchema(schema);
    JavaRDD<GenericRecord> genericRecordsRDD = records.map(f -> (GenericRecord) f.getData().getInsertValue(serializableSchema.get()).get());
    Dataset<Row> sourceDataset = AvroConversionUtils.createDataFrame(genericRecordsRDD.rdd(), schema.toString(), sparkEngineContext.getSqlContext().sparkSession());
    Dataset<Row> sortedDataset = reorder(sourceDataset, outputSparkPartitions);
    return HoodieSparkUtils.createRdd(sortedDataset, schema.getName(), schema.getNamespace(), false, Option.empty()).toJavaRDD().map(record -> {
        String key = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
        String partition = record.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
        HoodieKey hoodieKey = new HoodieKey(key, partition);
        HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, new RewriteAvroPayload(record));
        return hoodieRecord;
    });
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) Row(org.apache.spark.sql.Row) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) GenericRecord(org.apache.avro.generic.GenericRecord) SerializableSchema(org.apache.hudi.common.config.SerializableSchema)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)4 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)4 HoodieKey (org.apache.hudi.common.model.HoodieKey)4 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)4 RewriteAvroPayload (org.apache.hudi.common.model.RewriteAvroPayload)4 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)3 BaseKeyGenerator (org.apache.hudi.keygen.BaseKeyGenerator)3 IOException (java.io.IOException)2 HoodieIOException (org.apache.hudi.exception.HoodieIOException)2 SerializableSchema (org.apache.hudi.common.config.SerializableSchema)1 TypedProperties (org.apache.hudi.common.config.TypedProperties)1 Row (org.apache.spark.sql.Row)1