use of org.apache.hudi.io.storage.HoodieHBaseKVComparator in project hudi by apache.
the class TestInLineFileSystemHFileInLining method testSimpleInlineFileSystem.
@Test
public void testSimpleInlineFileSystem() throws IOException {
Path outerInMemFSPath = getRandomOuterInMemPath();
Path outerPath = new Path(FILE_SCHEME + outerInMemFSPath.toString().substring(outerInMemFSPath.toString().indexOf(':')));
generatedPath = outerPath;
CacheConfig cacheConf = new CacheConfig(inMemoryConf);
FSDataOutputStream fout = createFSOutput(outerInMemFSPath, inMemoryConf);
HFileContext meta = new HFileContextBuilder().withBlockSize(minBlockSize).build();
HFile.Writer writer = HFile.getWriterFactory(inMemoryConf, cacheConf).withOutputStream(fout).withFileContext(meta).withComparator(new HoodieHBaseKVComparator()).create();
writeRecords(writer);
fout.close();
byte[] inlineBytes = getBytesToInline(outerInMemFSPath);
long startOffset = generateOuterFile(outerPath, inlineBytes);
long inlineLength = inlineBytes.length;
// Generate phantom inline file
Path inlinePath = getPhantomFile(outerPath, startOffset, inlineLength);
InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf);
FSDataInputStream fin = inlineFileSystem.open(inlinePath);
HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, inlineConf);
// Load up the index.
reader.loadFileInfo();
// Get a scanner that caches and that does not use pread.
HFileScanner scanner = reader.getScanner(true, false);
// Align scanner at start of the file.
scanner.seekTo();
readAllRecords(scanner);
Set<Integer> rowIdsToSearch = getRandomValidRowIds(10);
for (int rowId : rowIdsToSearch) {
assertEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), "location lookup failed");
// read the key and see if it matches
ByteBuffer readKey = scanner.getKey();
assertArrayEquals(getSomeKey(rowId), Bytes.toBytes(readKey), "seeked key does not match");
scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
ByteBuffer val1 = scanner.getValue();
scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
ByteBuffer val2 = scanner.getValue();
assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2));
}
int[] invalidRowIds = { -4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000 };
for (int rowId : invalidRowIds) {
assertNotEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), "location lookup should have failed");
}
reader.close();
fin.close();
outerPath.getFileSystem(inMemoryConf).delete(outerPath, true);
}
use of org.apache.hudi.io.storage.HoodieHBaseKVComparator in project hudi by apache.
the class HoodieHFileDataBlock method serializeRecords.
@Override
protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
HFileContext context = new HFileContextBuilder().withBlockSize(DEFAULT_BLOCK_SIZE).withCompression(compressionAlgorithm.get()).build();
Configuration conf = new Configuration();
CacheConfig cacheConfig = new CacheConfig(conf);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
FSDataOutputStream ostream = new FSDataOutputStream(baos, null);
// Use simple incrementing counter as a key
boolean useIntegerKey = !getRecordKey(records.get(0)).isPresent();
// This is set here to avoid re-computing this in the loop
int keyWidth = useIntegerKey ? (int) Math.ceil(Math.log(records.size())) + 1 : -1;
// Serialize records into bytes
Map<String, byte[]> sortedRecordsMap = new TreeMap<>();
Iterator<IndexedRecord> itr = records.iterator();
int id = 0;
while (itr.hasNext()) {
IndexedRecord record = itr.next();
String recordKey;
if (useIntegerKey) {
recordKey = String.format("%" + keyWidth + "s", id++);
} else {
recordKey = getRecordKey(record).get();
}
final byte[] recordBytes = serializeRecord(record);
ValidationUtils.checkState(!sortedRecordsMap.containsKey(recordKey), "Writing multiple records with same key not supported for " + this.getClass().getName());
sortedRecordsMap.put(recordKey, recordBytes);
}
HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig).withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
// Write the records
sortedRecordsMap.forEach((recordKey, recordBytes) -> {
try {
KeyValue kv = new KeyValue(recordKey.getBytes(), null, null, recordBytes);
writer.append(kv);
} catch (IOException e) {
throw new HoodieIOException("IOException serializing records", e);
}
});
writer.close();
ostream.flush();
ostream.close();
return baos.toByteArray();
}
Aggregations