Search in sources :

Example 1 with CellComparator

use of org.apache.hadoop.hbase.CellComparator in project hbase by apache.

the class TestHFileWriterV3 method writeDataAndReadFromHFile.

private void writeDataAndReadFromHFile(Path hfilePath, Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException {
    HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(useTags).withDataBlockEncoding(DataBlockEncoding.NONE).withCompression(compressAlgo).build();
    CacheConfig cacheConfig = new CacheConfig(conf);
    HFile.Writer writer = new HFile.WriterFactory(conf, cacheConfig).withPath(fs, hfilePath).withFileContext(context).create();
    // Just a fixed seed.
    Random rand = new Random(9713312);
    List<KeyValue> keyValues = new ArrayList<>(entryCount);
    for (int i = 0; i < entryCount; ++i) {
        byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(rand, i);
        // A random-length random value.
        byte[] valueBytes = RandomKeyValueUtil.randomValue(rand);
        KeyValue keyValue = null;
        if (useTags) {
            ArrayList<Tag> tags = new ArrayList<>();
            for (int j = 0; j < 1 + rand.nextInt(4); j++) {
                byte[] tagBytes = new byte[16];
                rand.nextBytes(tagBytes);
                tags.add(new ArrayBackedTag((byte) 1, tagBytes));
            }
            keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes, tags);
        } else {
            keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes);
        }
        writer.append(keyValue);
        keyValues.add(keyValue);
    }
    // Add in an arbitrary order. They will be sorted lexicographically by
    // the key.
    writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
    writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
    writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
    writer.close();
    FSDataInputStream fsdis = fs.open(hfilePath);
    long fileSize = fs.getFileStatus(hfilePath).getLen();
    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize);
    assertEquals(3, trailer.getMajorVersion());
    assertEquals(entryCount, trailer.getEntryCount());
    HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withIncludesMvcc(false).withIncludesTags(useTags).withDataBlockEncoding(DataBlockEncoding.NONE).withHBaseCheckSum(true).build();
    ReaderContext readerContext = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(fsdis)).withFilePath(hfilePath).withFileSystem(fs).withFileSize(fileSize).build();
    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(readerContext, meta, ByteBuffAllocator.HEAP, conf);
    // Comparator class name is stored in the trailer in version 3.
    CellComparator comparator = trailer.createComparator();
    HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels());
    HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
    HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), fileSize - trailer.getTrailerSize());
    // Data index. We also read statistics about the block index written after
    // the root level.
    dataBlockIndexReader.readMultiLevelIndexRoot(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
    FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fs, hfilePath);
    readerContext = new ReaderContextBuilder().withFilePath(hfilePath).withFileSize(fileSize).withFileSystem(wrapper.getHfs()).withInputStreamWrapper(wrapper).build();
    HFileInfo hfile = new HFileInfo(readerContext, conf);
    HFile.Reader reader = new HFilePreadReader(readerContext, hfile, cacheConfig, conf);
    hfile.initMetaAndIndex(reader);
    if (findMidKey) {
        Cell midkey = dataBlockIndexReader.midkey(reader);
        assertNotNull("Midkey should not be null", midkey);
    }
    // Meta index.
    metaBlockIndexReader.readRootIndex(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount());
    // File info
    HFileInfo fileInfo = new HFileInfo();
    fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
    byte[] keyValueFormatVersion = fileInfo.get(HFileWriterImpl.KEY_VALUE_VERSION);
    boolean includeMemstoreTS = keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0;
    // Counters for the number of key/value pairs and the number of blocks
    int entriesRead = 0;
    int blocksRead = 0;
    long memstoreTS = 0;
    // Scan blocks the way the reader would scan them
    fsdis.seek(0);
    long curBlockPos = 0;
    while (curBlockPos <= trailer.getLastDataBlockOffset()) {
        HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true).unpack(context, blockReader);
        assertEquals(BlockType.DATA, block.getBlockType());
        ByteBuff buf = block.getBufferWithoutHeader();
        int keyLen = -1;
        while (buf.hasRemaining()) {
            keyLen = buf.getInt();
            int valueLen = buf.getInt();
            byte[] key = new byte[keyLen];
            buf.get(key);
            byte[] value = new byte[valueLen];
            buf.get(value);
            byte[] tagValue = null;
            if (useTags) {
                int tagLen = ((buf.get() & 0xff) << 8) ^ (buf.get() & 0xff);
                tagValue = new byte[tagLen];
                buf.get(tagValue);
            }
            if (includeMemstoreTS) {
                ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
                DataInputStream data_input = new DataInputStream(byte_input);
                memstoreTS = WritableUtils.readVLong(data_input);
                buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
            }
            // A brute-force check to see that all keys and values are correct.
            KeyValue kv = keyValues.get(entriesRead);
            assertTrue(Bytes.compareTo(key, kv.getKey()) == 0);
            assertTrue(Bytes.compareTo(value, 0, value.length, kv.getValueArray(), kv.getValueOffset(), kv.getValueLength()) == 0);
            if (useTags) {
                assertNotNull(tagValue);
                KeyValue tkv = kv;
                assertEquals(tagValue.length, tkv.getTagsLength());
                assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getTagsArray(), tkv.getTagsOffset(), tkv.getTagsLength()) == 0);
            }
            ++entriesRead;
        }
        ++blocksRead;
        curBlockPos += block.getOnDiskSizeWithHeader();
    }
    LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + blocksRead);
    assertEquals(entryCount, entriesRead);
    // Meta blocks. We can scan until the load-on-open data offset (which is
    // the root block index offset in version 2) because we are not testing
    // intermediate-level index blocks here.
    int metaCounter = 0;
    while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
        LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " + trailer.getLoadOnOpenDataOffset());
        HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true).unpack(context, blockReader);
        assertEquals(BlockType.META, block.getBlockType());
        Text t = new Text();
        ByteBuff buf = block.getBufferWithoutHeader();
        if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
            throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName());
        }
        Text expectedText = (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text("Moscow") : new Text("Washington, D.C."));
        assertEquals(expectedText, t);
        LOG.info("Read meta block data: " + t);
        ++metaCounter;
        curBlockPos += block.getOnDiskSizeWithHeader();
    }
    fsdis.close();
    reader.close();
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Random(java.util.Random) CellComparator(org.apache.hadoop.hbase.CellComparator) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) Cell(org.apache.hadoop.hbase.Cell) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag)

Example 2 with CellComparator

use of org.apache.hadoop.hbase.CellComparator in project hbase by apache.

the class TestFilterList method testHintPassThru.

/**
 * Test pass-thru of hints.
 */
@Test
public void testHintPassThru() throws Exception {
    final KeyValue minKeyValue = new KeyValue(Bytes.toBytes(0L), null, null);
    final KeyValue maxKeyValue = new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
    Filter filterNoHint = new FilterBase() {

        @Override
        public byte[] toByteArray() {
            return null;
        }

        @Override
        public ReturnCode filterCell(final Cell ignored) throws IOException {
            return ReturnCode.INCLUDE;
        }
    };
    Filter filterMinHint = new FilterBase() {

        @Override
        public ReturnCode filterCell(final Cell ignored) {
            return ReturnCode.SEEK_NEXT_USING_HINT;
        }

        @Override
        public Cell getNextCellHint(Cell currentKV) {
            return minKeyValue;
        }

        @Override
        public byte[] toByteArray() {
            return null;
        }
    };
    Filter filterMaxHint = new FilterBase() {

        @Override
        public ReturnCode filterCell(final Cell ignored) {
            return ReturnCode.SEEK_NEXT_USING_HINT;
        }

        @Override
        public Cell getNextCellHint(Cell cell) {
            return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null);
        }

        @Override
        public byte[] toByteArray() {
            return null;
        }
    };
    CellComparator comparator = CellComparator.getInstance();
    // MUST PASS ONE
    // Should take the min if given two hints
    FilterList filterList = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(new Filter[] { filterMinHint, filterMaxHint }));
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), minKeyValue));
    // Should have no hint if any filter has no hint
    filterList = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(new Filter[] { filterMinHint, filterMaxHint, filterNoHint }));
    assertNull(filterList.getNextCellHint(null));
    filterList = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(new Filter[] { filterNoHint, filterMaxHint }));
    assertNull(filterList.getNextCellHint(null));
    // Should give max hint if its the only one
    filterList = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(new Filter[] { filterMaxHint, filterMaxHint }));
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), maxKeyValue));
    // MUST PASS ALL
    // Should take the first hint
    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(new Filter[] { filterMinHint, filterMaxHint }));
    filterList.filterCell(null);
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), minKeyValue));
    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(new Filter[] { filterMaxHint, filterMinHint }));
    filterList.filterCell(null);
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), maxKeyValue));
    // Should have first hint even if a filter has no hint
    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(new Filter[] { filterNoHint, filterMinHint, filterMaxHint }));
    filterList.filterCell(null);
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), minKeyValue));
    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(new Filter[] { filterNoHint, filterMaxHint }));
    filterList.filterCell(null);
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), maxKeyValue));
    filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(new Filter[] { filterNoHint, filterMinHint }));
    filterList.filterCell(null);
    assertEquals(0, comparator.compare(filterList.getNextCellHint(null), minKeyValue));
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) CellComparator(org.apache.hadoop.hbase.CellComparator) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Example 3 with CellComparator

use of org.apache.hadoop.hbase.CellComparator in project hbase by apache.

the class MutableRegionInfo method containsRange.

/**
 * Returns true if the given inclusive range of rows is fully contained
 * by this region. For example, if the region is foo,a,g and this is
 * passed ["b","c"] or ["a","c"] it will return true, but if this is passed
 * ["b","z"] it will return false.
 * @throws IllegalArgumentException if the range passed is invalid (ie. end &lt; start)
 */
@Override
public boolean containsRange(byte[] rangeStartKey, byte[] rangeEndKey) {
    CellComparator cellComparator = CellComparatorImpl.getCellComparator(tableName);
    if (cellComparator.compareRows(rangeStartKey, rangeEndKey) > 0) {
        throw new IllegalArgumentException("Invalid range: " + Bytes.toStringBinary(rangeStartKey) + " > " + Bytes.toStringBinary(rangeEndKey));
    }
    boolean firstKeyInRange = cellComparator.compareRows(rangeStartKey, startKey) >= 0;
    boolean lastKeyInRange = cellComparator.compareRows(rangeEndKey, endKey) < 0 || Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY);
    return firstKeyInRange && lastKeyInRange;
}
Also used : CellComparator(org.apache.hadoop.hbase.CellComparator)

Example 4 with CellComparator

use of org.apache.hadoop.hbase.CellComparator in project hbase by apache.

the class FixedFileTrailer method setComparatorClass.

public void setComparatorClass(Class<? extends CellComparator> klass) {
    // Is the comparator instantiable?
    try {
        // If null, it should be the Bytes.BYTES_RAWCOMPARATOR
        if (klass != null) {
            CellComparator comp = klass.getDeclaredConstructor().newInstance();
            // if the name wasn't one of the legacy names, maybe its a legit new
            // kind of comparator.
            this.comparatorClassName = klass.getName();
        }
    } catch (Exception e) {
        throw new RuntimeException("Comparator class " + klass.getName() + " is not instantiable", e);
    }
}
Also used : CellComparator(org.apache.hadoop.hbase.CellComparator) MetaCellComparator(org.apache.hadoop.hbase.MetaCellComparator) IOException(java.io.IOException)

Example 5 with CellComparator

use of org.apache.hadoop.hbase.CellComparator in project hbase by apache.

the class TestHFileWriterV3WithDataEncoders method writeDataAndReadFromHFile.

private void writeDataAndReadFromHFile(Path hfilePath, Compression.Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException {
    HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(useTags).withDataBlockEncoding(dataBlockEncoding).withCellComparator(CellComparatorImpl.COMPARATOR).withCompression(compressAlgo).build();
    CacheConfig cacheConfig = new CacheConfig(conf);
    HFile.Writer writer = new HFile.WriterFactory(conf, cacheConfig).withPath(fs, hfilePath).withFileContext(context).create();
    // Just a fixed seed.
    Random rand = new Random(9713312);
    List<KeyValue> keyValues = new ArrayList<>(entryCount);
    writeKeyValues(entryCount, useTags, writer, rand, keyValues);
    FSDataInputStream fsdis = fs.open(hfilePath);
    long fileSize = fs.getFileStatus(hfilePath).getLen();
    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize);
    Assert.assertEquals(3, trailer.getMajorVersion());
    Assert.assertEquals(entryCount, trailer.getEntryCount());
    HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withIncludesMvcc(true).withIncludesTags(useTags).withDataBlockEncoding(dataBlockEncoding).withHBaseCheckSum(true).build();
    ReaderContext readerContext = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(fsdis)).withFilePath(hfilePath).withFileSystem(fs).withFileSize(fileSize).build();
    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(readerContext, meta, ByteBuffAllocator.HEAP, conf);
    // Comparator class name is stored in the trailer in version 3.
    CellComparator comparator = trailer.createComparator();
    HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels());
    HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
    HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), fileSize - trailer.getTrailerSize());
    // Data index. We also read statistics about the block index written after
    // the root level.
    dataBlockIndexReader.readMultiLevelIndexRoot(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
    FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fs, hfilePath);
    readerContext = new ReaderContextBuilder().withFilePath(hfilePath).withFileSize(fileSize).withFileSystem(wrapper.getHfs()).withInputStreamWrapper(wrapper).build();
    HFileInfo hfile = new HFileInfo(readerContext, conf);
    HFile.Reader reader = new HFilePreadReader(readerContext, hfile, cacheConfig, conf);
    hfile.initMetaAndIndex(reader);
    if (findMidKey) {
        Cell midkey = dataBlockIndexReader.midkey(reader);
        Assert.assertNotNull("Midkey should not be null", midkey);
    }
    // Meta index.
    metaBlockIndexReader.readRootIndex(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount());
    // File info
    HFileInfo fileInfo = new HFileInfo();
    fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
    byte[] keyValueFormatVersion = fileInfo.get(HFileWriterImpl.KEY_VALUE_VERSION);
    boolean includeMemstoreTS = keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0;
    // Counters for the number of key/value pairs and the number of blocks
    int entriesRead = 0;
    int blocksRead = 0;
    long memstoreTS = 0;
    DataBlockEncoder encoder = dataBlockEncoding.getEncoder();
    long curBlockPos = scanBlocks(entryCount, context, keyValues, fsdis, trailer, meta, blockReader, entriesRead, blocksRead, encoder);
    // Meta blocks. We can scan until the load-on-open data offset (which is
    // the root block index offset in version 2) because we are not testing
    // intermediate-level index blocks here.
    int metaCounter = 0;
    while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
        LOG.info("Current offset: {}, scanning until {}", fsdis.getPos(), trailer.getLoadOnOpenDataOffset());
        HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true).unpack(context, blockReader);
        Assert.assertEquals(BlockType.META, block.getBlockType());
        Text t = new Text();
        ByteBuff buf = block.getBufferWithoutHeader();
        if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
            throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName());
        }
        Text expectedText = (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text("Moscow") : new Text("Washington, D.C."));
        Assert.assertEquals(expectedText, t);
        LOG.info("Read meta block data: " + t);
        ++metaCounter;
        curBlockPos += block.getOnDiskSizeWithHeader();
    }
    fsdis.close();
    reader.close();
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Random(java.util.Random) CellComparator(org.apache.hadoop.hbase.CellComparator) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) DataBlockEncoder(org.apache.hadoop.hbase.io.encoding.DataBlockEncoder) Cell(org.apache.hadoop.hbase.Cell) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)

Aggregations

CellComparator (org.apache.hadoop.hbase.CellComparator)6 IOException (java.io.IOException)3 Cell (org.apache.hadoop.hbase.Cell)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 ArrayList (java.util.ArrayList)2 Random (java.util.Random)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 FSDataInputStreamWrapper (org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)2 ByteBuff (org.apache.hadoop.hbase.nio.ByteBuff)2 Text (org.apache.hadoop.io.Text)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)1 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)1 MetaCellComparator (org.apache.hadoop.hbase.MetaCellComparator)1 Tag (org.apache.hadoop.hbase.Tag)1 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)1 DataBlockEncoder (org.apache.hadoop.hbase.io.encoding.DataBlockEncoder)1