use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class HoodieTestDataGenerator method generateSameKeyInserts.
public List<HoodieRecord> generateSameKeyInserts(String instantTime, List<HoodieRecord> origin) throws IOException {
List<HoodieRecord> copy = new ArrayList<>();
for (HoodieRecord r : origin) {
HoodieKey key = r.getKey();
HoodieRecord record = new HoodieAvroRecord(key, generateRandomValue(key, instantTime));
copy.add(record);
}
return copy;
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class HoodieTestDataGenerator method generateInsertsWithHoodieAvroPayload.
public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String instantTime, int limit) {
List<HoodieRecord> inserts = new ArrayList<>();
int currSize = getNumExistingKeys(TRIP_EXAMPLE_SCHEMA);
for (int i = 0; i < limit; i++) {
String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
HoodieKey key = new HoodieKey(genPseudoRandomUUID(rand).toString(), partitionPath);
HoodieRecord record = new HoodieAvroRecord(key, generateAvroPayload(key, instantTime));
inserts.add(record);
KeyPartition kp = new KeyPartition();
kp.key = key;
kp.partitionPath = partitionPath;
populateKeysBySchema(TRIP_EXAMPLE_SCHEMA, currSize + i, kp);
incrementNumExistingKeysBySchema(TRIP_EXAMPLE_SCHEMA);
}
return inserts;
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class SpillableMapTestUtils method upsertRecords.
public static List<String> upsertRecords(List<IndexedRecord> iRecords, Map<String, HoodieRecord<? extends HoodieRecordPayload>> records) {
List<String> recordKeys = new ArrayList<>();
iRecords.forEach(r -> {
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
recordKeys.add(key);
HoodieRecord record = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
record.unseal();
record.setCurrentLocation(new HoodieRecordLocation("DUMMY_COMMIT_TIME", "DUMMY_FILE_ID"));
record.seal();
records.put(key, record);
});
return recordKeys;
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestParquetInLining method getParquetHoodieRecords.
static List<GenericRecord> getParquetHoodieRecords() throws IOException {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
String commitTime = "001";
List<HoodieRecord> hoodieRecords = dataGenerator.generateInsertsWithHoodieAvroPayload(commitTime, 10);
List<GenericRecord> toReturn = new ArrayList<>();
for (HoodieRecord record : hoodieRecords) {
toReturn.add((GenericRecord) ((HoodieAvroRecord) record).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
}
return toReturn;
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestHoodieLogFormat method testBasicAppendAndScanMultipleFiles.
@ParameterizedTest
@MethodSource("testArguments")
public void testBasicAppendAndScanMultipleFiles(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled, boolean readBlocksLazily) throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withSizeThreshold(1024).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
Set<HoodieLogFile> logFiles = new HashSet<>();
List<List<IndexedRecord>> allRecords = new ArrayList<>();
// create 4 log files
while (writer.getLogFile().getLogVersion() != 4) {
logFiles.add(writer.getLogFile());
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
allRecords.add(copyOfRecords1);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
writer.appendBlock(dataBlock);
}
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// scan all log blocks (across multiple log files)
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(fs).withBasePath(basePath).withLogFilePaths(logFiles.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList())).withReaderSchema(schema).withLatestInstantTime("100").withMaxMemorySizeInBytes(10240L).withReadBlocksLazily(readBlocksLazily).withReverseReader(false).withBufferSize(bufferSize).withSpillableMapBasePath(BASE_OUTPUT_PATH).withDiskMapType(diskMapType).withBitCaskDiskMapCompressionEnabled(isCompressionEnabled).build();
List<IndexedRecord> scannedRecords = new ArrayList<>();
for (HoodieRecord record : scanner) {
scannedRecords.add((IndexedRecord) ((HoodieAvroRecord) record).getData().getInsertValue(schema).get());
}
assertEquals(scannedRecords.size(), allRecords.stream().mapToLong(Collection::size).sum(), "Scanner records count should be the same as appended records");
}
Aggregations