Search in sources :

Example 1 with HoodieHBaseKVComparator

use of org.apache.hudi.io.storage.HoodieHBaseKVComparator in project hudi by apache.

the class TestInLineFileSystemHFileInLining method testSimpleInlineFileSystem.

@Test
public void testSimpleInlineFileSystem() throws IOException {
    Path outerInMemFSPath = getRandomOuterInMemPath();
    Path outerPath = new Path(FILE_SCHEME + outerInMemFSPath.toString().substring(outerInMemFSPath.toString().indexOf(':')));
    generatedPath = outerPath;
    CacheConfig cacheConf = new CacheConfig(inMemoryConf);
    FSDataOutputStream fout = createFSOutput(outerInMemFSPath, inMemoryConf);
    HFileContext meta = new HFileContextBuilder().withBlockSize(minBlockSize).build();
    HFile.Writer writer = HFile.getWriterFactory(inMemoryConf, cacheConf).withOutputStream(fout).withFileContext(meta).withComparator(new HoodieHBaseKVComparator()).create();
    writeRecords(writer);
    fout.close();
    byte[] inlineBytes = getBytesToInline(outerInMemFSPath);
    long startOffset = generateOuterFile(outerPath, inlineBytes);
    long inlineLength = inlineBytes.length;
    // Generate phantom inline file
    Path inlinePath = getPhantomFile(outerPath, startOffset, inlineLength);
    InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf);
    FSDataInputStream fin = inlineFileSystem.open(inlinePath);
    HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, inlineConf);
    // Load up the index.
    reader.loadFileInfo();
    // Get a scanner that caches and that does not use pread.
    HFileScanner scanner = reader.getScanner(true, false);
    // Align scanner at start of the file.
    scanner.seekTo();
    readAllRecords(scanner);
    Set<Integer> rowIdsToSearch = getRandomValidRowIds(10);
    for (int rowId : rowIdsToSearch) {
        assertEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), "location lookup failed");
        // read the key and see if it matches
        ByteBuffer readKey = scanner.getKey();
        assertArrayEquals(getSomeKey(rowId), Bytes.toBytes(readKey), "seeked key does not match");
        scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
        ByteBuffer val1 = scanner.getValue();
        scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
        ByteBuffer val2 = scanner.getValue();
        assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2));
    }
    int[] invalidRowIds = { -4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000 };
    for (int rowId : invalidRowIds) {
        assertNotEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), "location lookup should have failed");
    }
    reader.close();
    fin.close();
    outerPath.getFileSystem(inMemoryConf).delete(outerPath, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystemTestUtils.getRandomOuterInMemPath(org.apache.hudi.common.testutils.FileSystemTestUtils.getRandomOuterInMemPath) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) ByteBuffer(java.nio.ByteBuffer) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) HoodieHBaseKVComparator(org.apache.hudi.io.storage.HoodieHBaseKVComparator) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Test(org.junit.jupiter.api.Test)

Example 2 with HoodieHBaseKVComparator

use of org.apache.hudi.io.storage.HoodieHBaseKVComparator in project hudi by apache.

the class HoodieHFileDataBlock method serializeRecords.

@Override
protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
    HFileContext context = new HFileContextBuilder().withBlockSize(DEFAULT_BLOCK_SIZE).withCompression(compressionAlgorithm.get()).build();
    Configuration conf = new Configuration();
    CacheConfig cacheConfig = new CacheConfig(conf);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    FSDataOutputStream ostream = new FSDataOutputStream(baos, null);
    // Use simple incrementing counter as a key
    boolean useIntegerKey = !getRecordKey(records.get(0)).isPresent();
    // This is set here to avoid re-computing this in the loop
    int keyWidth = useIntegerKey ? (int) Math.ceil(Math.log(records.size())) + 1 : -1;
    // Serialize records into bytes
    Map<String, byte[]> sortedRecordsMap = new TreeMap<>();
    Iterator<IndexedRecord> itr = records.iterator();
    int id = 0;
    while (itr.hasNext()) {
        IndexedRecord record = itr.next();
        String recordKey;
        if (useIntegerKey) {
            recordKey = String.format("%" + keyWidth + "s", id++);
        } else {
            recordKey = getRecordKey(record).get();
        }
        final byte[] recordBytes = serializeRecord(record);
        ValidationUtils.checkState(!sortedRecordsMap.containsKey(recordKey), "Writing multiple records with same key not supported for " + this.getClass().getName());
        sortedRecordsMap.put(recordKey, recordBytes);
    }
    HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig).withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
    // Write the records
    sortedRecordsMap.forEach((recordKey, recordBytes) -> {
        try {
            KeyValue kv = new KeyValue(recordKey.getBytes(), null, null, recordBytes);
            writer.append(kv);
        } catch (IOException e) {
            throw new HoodieIOException("IOException serializing records", e);
        }
    });
    writer.close();
    ostream.flush();
    ostream.close();
    return baos.toByteArray();
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) IndexedRecord(org.apache.avro.generic.IndexedRecord) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) TreeMap(java.util.TreeMap) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieHBaseKVComparator(org.apache.hudi.io.storage.HoodieHBaseKVComparator) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Aggregations

FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)2 HFile (org.apache.hadoop.hbase.io.hfile.HFile)2 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)2 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)2 HoodieHBaseKVComparator (org.apache.hudi.io.storage.HoodieHBaseKVComparator)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 TreeMap (java.util.TreeMap)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 Configuration (org.apache.hadoop.conf.Configuration)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 Path (org.apache.hadoop.fs.Path)1 KeyValue (org.apache.hadoop.hbase.KeyValue)1 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)1 FileSystemTestUtils.getRandomOuterInMemPath (org.apache.hudi.common.testutils.FileSystemTestUtils.getRandomOuterInMemPath)1 HoodieIOException (org.apache.hudi.exception.HoodieIOException)1 Test (org.junit.jupiter.api.Test)1