Search in sources :

Example 16 with ArrayBackedTag

use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.

the class TestHFileWriterV3 method writeDataAndReadFromHFile.

private void writeDataAndReadFromHFile(Path hfilePath, Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException {
    HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(useTags).withCompression(compressAlgo).build();
    HFile.Writer writer = new HFile.WriterFactory(conf, new CacheConfig(conf)).withPath(fs, hfilePath).withFileContext(context).withComparator(CellComparator.COMPARATOR).create();
    // Just a fixed seed.
    Random rand = new Random(9713312);
    List<KeyValue> keyValues = new ArrayList<>(entryCount);
    for (int i = 0; i < entryCount; ++i) {
        byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(rand, i);
        // A random-length random value.
        byte[] valueBytes = RandomKeyValueUtil.randomValue(rand);
        KeyValue keyValue = null;
        if (useTags) {
            ArrayList<Tag> tags = new ArrayList<>();
            for (int j = 0; j < 1 + rand.nextInt(4); j++) {
                byte[] tagBytes = new byte[16];
                rand.nextBytes(tagBytes);
                tags.add(new ArrayBackedTag((byte) 1, tagBytes));
            }
            keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes, tags);
        } else {
            keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes);
        }
        writer.append(keyValue);
        keyValues.add(keyValue);
    }
    // Add in an arbitrary order. They will be sorted lexicographically by
    // the key.
    writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
    writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
    writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
    writer.close();
    FSDataInputStream fsdis = fs.open(hfilePath);
    long fileSize = fs.getFileStatus(hfilePath).getLen();
    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize);
    assertEquals(3, trailer.getMajorVersion());
    assertEquals(entryCount, trailer.getEntryCount());
    HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withIncludesMvcc(false).withIncludesTags(useTags).withHBaseCheckSum(true).build();
    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
    // Comparator class name is stored in the trailer in version 3.
    CellComparator comparator = trailer.createComparator();
    HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels());
    HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
    HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), fileSize - trailer.getTrailerSize());
    // Data index. We also read statistics about the block index written after
    // the root level.
    dataBlockIndexReader.readMultiLevelIndexRoot(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
    if (findMidKey) {
        Cell midkey = dataBlockIndexReader.midkey();
        assertNotNull("Midkey should not be null", midkey);
    }
    // Meta index.
    metaBlockIndexReader.readRootIndex(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount());
    // File info
    FileInfo fileInfo = new FileInfo();
    fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
    byte[] keyValueFormatVersion = fileInfo.get(HFileWriterImpl.KEY_VALUE_VERSION);
    boolean includeMemstoreTS = keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0;
    // Counters for the number of key/value pairs and the number of blocks
    int entriesRead = 0;
    int blocksRead = 0;
    long memstoreTS = 0;
    // Scan blocks the way the reader would scan them
    fsdis.seek(0);
    long curBlockPos = 0;
    while (curBlockPos <= trailer.getLastDataBlockOffset()) {
        HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false).unpack(context, blockReader);
        assertEquals(BlockType.DATA, block.getBlockType());
        ByteBuff buf = block.getBufferWithoutHeader();
        int keyLen = -1;
        while (buf.hasRemaining()) {
            keyLen = buf.getInt();
            int valueLen = buf.getInt();
            byte[] key = new byte[keyLen];
            buf.get(key);
            byte[] value = new byte[valueLen];
            buf.get(value);
            byte[] tagValue = null;
            if (useTags) {
                int tagLen = ((buf.get() & 0xff) << 8) ^ (buf.get() & 0xff);
                tagValue = new byte[tagLen];
                buf.get(tagValue);
            }
            if (includeMemstoreTS) {
                ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
                DataInputStream data_input = new DataInputStream(byte_input);
                memstoreTS = WritableUtils.readVLong(data_input);
                buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
            }
            // A brute-force check to see that all keys and values are correct.
            KeyValue kv = keyValues.get(entriesRead);
            assertTrue(Bytes.compareTo(key, kv.getKey()) == 0);
            assertTrue(Bytes.compareTo(value, 0, value.length, kv.getValueArray(), kv.getValueOffset(), kv.getValueLength()) == 0);
            if (useTags) {
                assertNotNull(tagValue);
                KeyValue tkv = kv;
                assertEquals(tagValue.length, tkv.getTagsLength());
                assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getTagsArray(), tkv.getTagsOffset(), tkv.getTagsLength()) == 0);
            }
            ++entriesRead;
        }
        ++blocksRead;
        curBlockPos += block.getOnDiskSizeWithHeader();
    }
    LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + blocksRead);
    assertEquals(entryCount, entriesRead);
    // Meta blocks. We can scan until the load-on-open data offset (which is
    // the root block index offset in version 2) because we are not testing
    // intermediate-level index blocks here.
    int metaCounter = 0;
    while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
        LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " + trailer.getLoadOnOpenDataOffset());
        HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false).unpack(context, blockReader);
        assertEquals(BlockType.META, block.getBlockType());
        Text t = new Text();
        ByteBuff buf = block.getBufferWithoutHeader();
        if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
            throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName());
        }
        Text expectedText = (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text("Moscow") : new Text("Washington, D.C."));
        assertEquals(expectedText, t);
        LOG.info("Read meta block data: " + t);
        ++metaCounter;
        curBlockPos += block.getOnDiskSizeWithHeader();
    }
    fsdis.close();
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Random(java.util.Random) FileInfo(org.apache.hadoop.hbase.io.hfile.HFile.FileInfo) CellComparator(org.apache.hadoop.hbase.CellComparator) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) Cell(org.apache.hadoop.hbase.Cell) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag)

Example 17 with ArrayBackedTag

use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.

the class TestStoreFileScannerWithTagCompression method writeStoreFile.

private void writeStoreFile(final StoreFileWriter writer) throws IOException {
    byte[] fam = Bytes.toBytes("f");
    byte[] qualifier = Bytes.toBytes("q");
    long now = System.currentTimeMillis();
    byte[] b = Bytes.toBytes("k1");
    Tag t1 = new ArrayBackedTag((byte) 1, "tag1");
    Tag t2 = new ArrayBackedTag((byte) 2, "tag2");
    Tag t3 = new ArrayBackedTag((byte) 3, "tag3");
    try {
        writer.append(new KeyValue(b, fam, qualifier, now, b, new Tag[] { t1 }));
        b = Bytes.toBytes("k3");
        writer.append(new KeyValue(b, fam, qualifier, now, b, new Tag[] { t2, t1 }));
        b = Bytes.toBytes("k4");
        writer.append(new KeyValue(b, fam, qualifier, now, b, new Tag[] { t3 }));
        b = Bytes.toBytes("k5");
        writer.append(new KeyValue(b, fam, qualifier, now, b, new Tag[] { t3 }));
    } finally {
        writer.close();
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag)

Example 18 with ArrayBackedTag

use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.

the class TestHMobStore method init.

private void init(Configuration conf, HColumnDescriptor hcd) throws IOException {
    Path basedir = FSUtils.getRootDir(conf);
    fs = FileSystem.get(conf);
    Path homePath = new Path(basedir, Bytes.toString(family) + Path.SEPARATOR + Bytes.toString(family));
    fs.mkdirs(homePath);
    KeyValue key1 = new KeyValue(row, family, qf1, 1, value);
    KeyValue key2 = new KeyValue(row, family, qf2, 1, value);
    KeyValue key3 = new KeyValue(row2, family, qf3, 1, value2);
    KeyValue[] keys = new KeyValue[] { key1, key2, key3 };
    int maxKeyCount = keys.length;
    StoreFileWriter mobWriter = store.createWriterInTmp(currentDate, maxKeyCount, hcd.getCompactionCompressionType(), region.getRegionInfo().getStartKey(), false);
    mobFilePath = mobWriter.getPath();
    mobWriter.append(key1);
    mobWriter.append(key2);
    mobWriter.append(key3);
    mobWriter.close();
    String targetPathName = MobUtils.formatDate(currentDate);
    byte[] referenceValue = Bytes.toBytes(targetPathName + Path.SEPARATOR + mobFilePath.getName());
    Tag tableNameTag = new ArrayBackedTag(TagType.MOB_TABLE_NAME_TAG_TYPE, store.getTableName().getName());
    KeyValue kv1 = new KeyValue(row, family, qf1, Long.MAX_VALUE, referenceValue);
    KeyValue kv2 = new KeyValue(row, family, qf2, Long.MAX_VALUE, referenceValue);
    KeyValue kv3 = new KeyValue(row2, family, qf3, Long.MAX_VALUE, referenceValue);
    seekKey1 = MobUtils.createMobRefCell(kv1, referenceValue, tableNameTag);
    seekKey2 = MobUtils.createMobRefCell(kv2, referenceValue, tableNameTag);
    seekKey3 = MobUtils.createMobRefCell(kv3, referenceValue, tableNameTag);
}
Also used : Path(org.apache.hadoop.fs.Path) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag)

Example 19 with ArrayBackedTag

use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.

the class TestHRegion method testCellTTLs.

@Test
public void testCellTTLs() throws IOException {
    IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge();
    EnvironmentEdgeManager.injectEdge(edge);
    final byte[] row = Bytes.toBytes("testRow");
    final byte[] q1 = Bytes.toBytes("q1");
    final byte[] q2 = Bytes.toBytes("q2");
    final byte[] q3 = Bytes.toBytes("q3");
    final byte[] q4 = Bytes.toBytes("q4");
    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
    HColumnDescriptor hcd = new HColumnDescriptor(fam1);
    // 10 seconds
    hcd.setTimeToLive(10);
    htd.addFamily(hcd);
    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
    HRegion region = HBaseTestingUtility.createRegionAndWAL(new HRegionInfo(htd.getTableName(), HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY), TEST_UTIL.getDataTestDir(), conf, htd);
    assertNotNull(region);
    try {
        long now = EnvironmentEdgeManager.currentTime();
        // Add a cell that will expire in 5 seconds via cell TTL
        region.put(new Put(row).add(new KeyValue(row, fam1, q1, now, HConstants.EMPTY_BYTE_ARRAY, new ArrayBackedTag[] { // TTL tags specify ts in milliseconds
        new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(5000L)) })));
        // Add a cell that will expire after 10 seconds via family setting
        region.put(new Put(row).addColumn(fam1, q2, now, HConstants.EMPTY_BYTE_ARRAY));
        // Add a cell that will expire in 15 seconds via cell TTL
        region.put(new Put(row).add(new KeyValue(row, fam1, q3, now + 10000 - 1, HConstants.EMPTY_BYTE_ARRAY, new ArrayBackedTag[] { // TTL tags specify ts in milliseconds
        new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(5000L)) })));
        // Add a cell that will expire in 20 seconds via family setting
        region.put(new Put(row).addColumn(fam1, q4, now + 10000 - 1, HConstants.EMPTY_BYTE_ARRAY));
        // Flush so we are sure store scanning gets this right
        region.flush(true);
        // A query at time T+0 should return all cells
        Result r = region.get(new Get(row));
        assertNotNull(r.getValue(fam1, q1));
        assertNotNull(r.getValue(fam1, q2));
        assertNotNull(r.getValue(fam1, q3));
        assertNotNull(r.getValue(fam1, q4));
        // Increment time to T+5 seconds
        edge.incrementTime(5000);
        r = region.get(new Get(row));
        assertNull(r.getValue(fam1, q1));
        assertNotNull(r.getValue(fam1, q2));
        assertNotNull(r.getValue(fam1, q3));
        assertNotNull(r.getValue(fam1, q4));
        // Increment time to T+10 seconds
        edge.incrementTime(5000);
        r = region.get(new Get(row));
        assertNull(r.getValue(fam1, q1));
        assertNull(r.getValue(fam1, q2));
        assertNotNull(r.getValue(fam1, q3));
        assertNotNull(r.getValue(fam1, q4));
        // Increment time to T+15 seconds
        edge.incrementTime(5000);
        r = region.get(new Get(row));
        assertNull(r.getValue(fam1, q1));
        assertNull(r.getValue(fam1, q2));
        assertNull(r.getValue(fam1, q3));
        assertNotNull(r.getValue(fam1, q4));
        // Increment time to T+20 seconds
        edge.incrementTime(10000);
        r = region.get(new Get(row));
        assertNull(r.getValue(fam1, q1));
        assertNull(r.getValue(fam1, q2));
        assertNull(r.getValue(fam1, q3));
        assertNull(r.getValue(fam1, q4));
        // Fun with disappearing increments
        // Start at 1
        region.put(new Put(row).addColumn(fam1, q1, Bytes.toBytes(1L)));
        r = region.get(new Get(row));
        byte[] val = r.getValue(fam1, q1);
        assertNotNull(val);
        assertEquals(Bytes.toLong(val), 1L);
        // Increment with a TTL of 5 seconds
        Increment incr = new Increment(row).addColumn(fam1, q1, 1L);
        incr.setTTL(5000);
        // 2
        region.increment(incr);
        // New value should be 2
        r = region.get(new Get(row));
        val = r.getValue(fam1, q1);
        assertNotNull(val);
        assertEquals(Bytes.toLong(val), 2L);
        // Increment time to T+25 seconds
        edge.incrementTime(5000);
        // Value should be back to 1
        r = region.get(new Get(row));
        val = r.getValue(fam1, q1);
        assertNotNull(val);
        assertEquals(Bytes.toLong(val), 1L);
        // Increment time to T+30 seconds
        edge.incrementTime(5000);
        // Original value written at T+20 should be gone now via family TTL
        r = region.get(new Get(row));
        assertNull(r.getValue(fam1, q1));
    } finally {
        HBaseTestingUtility.closeRegionAndWAL(region);
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) IncrementingEnvironmentEdge(org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge) Put(org.apache.hadoop.hbase.client.Put) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Result(org.apache.hadoop.hbase.client.Result) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Get(org.apache.hadoop.hbase.client.Get) Increment(org.apache.hadoop.hbase.client.Increment) Test(org.junit.Test)

Example 20 with ArrayBackedTag

use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.

the class TsvImporterMapper method map.

/**
   * Convert a line of TSV text into an HBase table row.
   */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();
    try {
        ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
        // Retrieve timestamp if exists
        ts = parsed.getTimestamp(ts);
        cellVisibilityExpr = parsed.getCellVisibility();
        ttl = parsed.getCellTTL();
        // create tags for the parsed line
        if (hfileOutPath != null) {
            tags.clear();
            if (cellVisibilityExpr != null) {
                tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
            }
            // into puts
            if (ttl > 0) {
                tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
            }
        }
        Put put = new Put(rowKey.copyBytes());
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) {
                continue;
            }
            populatePut(lineBytes, parsed, put, i);
        }
        context.write(rowKey, put);
    } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
        if (logBadLines) {
            System.err.println(value);
        }
        System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
        if (skipBadLines) {
            incrementBadLineCount(1);
            return;
        }
        throw new IOException(badLine);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) IOException(java.io.IOException) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Put(org.apache.hadoop.hbase.client.Put) BadTsvLineException(org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException) InvalidLabelException(org.apache.hadoop.hbase.security.visibility.InvalidLabelException)

Aggregations

ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)37 KeyValue (org.apache.hadoop.hbase.KeyValue)31 Tag (org.apache.hadoop.hbase.Tag)31 ArrayList (java.util.ArrayList)23 Cell (org.apache.hadoop.hbase.Cell)14 Test (org.junit.Test)9 Put (org.apache.hadoop.hbase.client.Put)7 IOException (java.io.IOException)6 ByteBufferKeyValue (org.apache.hadoop.hbase.ByteBufferKeyValue)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 DataOutputStream (java.io.DataOutputStream)4 Path (org.apache.hadoop.fs.Path)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 DataInputStream (java.io.DataInputStream)3 ByteBuffer (java.nio.ByteBuffer)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Random (java.util.Random)3 CountingInputStream (com.google.common.io.CountingInputStream)2 CountingOutputStream (com.google.common.io.CountingOutputStream)2