use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.
the class TestCellCodecWithTags method testCellWithTag.
@Test
public void testCellWithTag() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
CountingOutputStream cos = new CountingOutputStream(baos);
DataOutputStream dos = new DataOutputStream(cos);
Codec codec = new CellCodecWithTags();
Codec.Encoder encoder = codec.getEncoder(dos);
final Cell cell1 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("1"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("1"), new Tag[] { new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring1")), new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring2")) });
final Cell cell2 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("2"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("2"), new Tag[] { new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring3")) });
final Cell cell3 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("3"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("3"), new Tag[] { new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring4")), new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring5")), new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring6")) });
encoder.write(cell1);
encoder.write(cell2);
encoder.write(cell3);
encoder.flush();
dos.close();
long offset = cos.getCount();
CountingInputStream cis = new CountingInputStream(new ByteArrayInputStream(baos.toByteArray()));
DataInputStream dis = new DataInputStream(cis);
Codec.Decoder decoder = codec.getDecoder(dis);
assertTrue(decoder.advance());
Cell c = decoder.current();
assertTrue(CellUtil.equals(c, cell1));
List<Tag> tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(2, tags.size());
Tag tag = tags.get(0);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring1"), TagUtil.cloneValue(tag)));
tag = tags.get(1);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring2"), TagUtil.cloneValue(tag)));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellUtil.equals(c, cell2));
tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(1, tags.size());
tag = tags.get(0);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring3"), TagUtil.cloneValue(tag)));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellUtil.equals(c, cell3));
tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(3, tags.size());
tag = tags.get(0);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring4"), TagUtil.cloneValue(tag)));
tag = tags.get(1);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring5"), TagUtil.cloneValue(tag)));
tag = tags.get(2);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring6"), TagUtil.cloneValue(tag)));
assertFalse(decoder.advance());
dis.close();
assertEquals(offset, cis.getCount());
}
use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.
the class TestKeyValueCodecWithTags method testKeyValueWithTag.
@Test
public void testKeyValueWithTag() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
CountingOutputStream cos = new CountingOutputStream(baos);
DataOutputStream dos = new DataOutputStream(cos);
Codec codec = new KeyValueCodecWithTags();
Codec.Encoder encoder = codec.getEncoder(dos);
final KeyValue kv1 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("1"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("1"), new Tag[] { new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring1")), new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring2")) });
final KeyValue kv2 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("2"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("2"), new Tag[] { new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring3")) });
final KeyValue kv3 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("3"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("3"), new Tag[] { new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring4")), new ArrayBackedTag((byte) 2, Bytes.toBytes("teststring5")), new ArrayBackedTag((byte) 1, Bytes.toBytes("teststring6")) });
encoder.write(kv1);
encoder.write(kv2);
encoder.write(kv3);
encoder.flush();
dos.close();
long offset = cos.getCount();
CountingInputStream cis = new CountingInputStream(new ByteArrayInputStream(baos.toByteArray()));
DataInputStream dis = new DataInputStream(cis);
Codec.Decoder decoder = codec.getDecoder(dis);
assertTrue(decoder.advance());
Cell c = decoder.current();
assertTrue(CellUtil.equals(c, kv1));
List<Tag> tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(2, tags.size());
Tag tag = tags.get(0);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring1"), TagUtil.cloneValue(tag)));
tag = tags.get(1);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring2"), TagUtil.cloneValue(tag)));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellUtil.equals(c, kv2));
tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(1, tags.size());
tag = tags.get(0);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring3"), TagUtil.cloneValue(tag)));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellUtil.equals(c, kv3));
tags = TagUtil.asList(c.getTagsArray(), c.getTagsOffset(), c.getTagsLength());
assertEquals(3, tags.size());
tag = tags.get(0);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring4"), TagUtil.cloneValue(tag)));
tag = tags.get(1);
assertEquals(2, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring5"), TagUtil.cloneValue(tag)));
tag = tags.get(2);
assertEquals(1, tag.getType());
assertTrue(Bytes.equals(Bytes.toBytes("teststring6"), TagUtil.cloneValue(tag)));
assertFalse(decoder.advance());
dis.close();
assertEquals(offset, cis.getCount());
}
use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.
the class TextSortReducer method reduce.
@Override
protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> context) throws java.io.IOException, InterruptedException {
// although reduce() is called per-row, handle pathological case
long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
Iterator<Text> iter = lines.iterator();
while (iter.hasNext()) {
Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
long curSize = 0;
// stop at the end or the RAM threshold
while (iter.hasNext() && curSize < threshold) {
Text line = iter.next();
byte[] lineBytes = line.getBytes();
try {
ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
// Retrieve timestamp if exists
ts = parsed.getTimestamp(ts);
cellVisibilityExpr = parsed.getCellVisibility();
ttl = parsed.getCellTTL();
// create tags for the parsed line
List<Tag> tags = new ArrayList<>();
if (cellVisibilityExpr != null) {
tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
}
// into puts
if (ttl > 0) {
tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
}
for (int i = 0; i < parsed.getColumnCount(); i++) {
if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
continue;
}
// Creating the KV which needs to be directly written to HFiles. Using the Facade
// KVCreator for creation of kvs.
Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
kvs.add(kv);
curSize += kv.heapSize();
}
} catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
if (skipBadLines) {
System.err.println("Bad line." + badLine.getMessage());
incrementBadLineCount(1);
continue;
}
throw new IOException(badLine);
}
}
context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
int index = 0;
for (KeyValue kv : kvs) {
context.write(rowKey, kv);
if (++index > 0 && index % 100 == 0)
context.setStatus("Wrote " + index + " key values.");
}
// if we have more entries to process
if (iter.hasNext()) {
// force flush because we cannot guarantee intra-row sorted order
context.write(null, null);
}
}
}
use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.
the class TestCacheOnWrite method testNotCachingDataBlocksDuringCompactionInternals.
private void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags) throws IOException, InterruptedException {
// TODO: need to change this test if we add a cache size threshold for
// compactions, or if we implement some other kind of intelligent logic for
// deciding what blocks to cache-on-write on compaction.
final String table = "CompactionCacheOnWrite";
final String cf = "myCF";
final byte[] cfBytes = Bytes.toBytes(cf);
final int maxVersions = 3;
Region region = TEST_UTIL.createTestRegion(table, new HColumnDescriptor(cf).setCompressionType(compress).setBloomFilterType(BLOOM_TYPE).setMaxVersions(maxVersions).setDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding()));
int rowIdx = 0;
long ts = EnvironmentEdgeManager.currentTime();
for (int iFile = 0; iFile < 5; ++iFile) {
for (int iRow = 0; iRow < 500; ++iRow) {
String rowStr = "" + (rowIdx * rowIdx * rowIdx) + "row" + iFile + "_" + iRow;
Put p = new Put(Bytes.toBytes(rowStr));
++rowIdx;
for (int iCol = 0; iCol < 10; ++iCol) {
String qualStr = "col" + iCol;
String valueStr = "value_" + rowStr + "_" + qualStr;
for (int iTS = 0; iTS < 5; ++iTS) {
if (useTags) {
Tag t = new ArrayBackedTag((byte) 1, "visibility");
Tag[] tags = new Tag[1];
tags[0] = t;
KeyValue kv = new KeyValue(Bytes.toBytes(rowStr), cfBytes, Bytes.toBytes(qualStr), HConstants.LATEST_TIMESTAMP, Bytes.toBytes(valueStr), tags);
p.add(kv);
} else {
p.addColumn(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr));
}
}
}
p.setDurability(Durability.ASYNC_WAL);
region.put(p);
}
region.flush(true);
}
clearBlockCache(blockCache);
assertEquals(0, blockCache.getBlockCount());
region.compact(false);
LOG.debug("compactStores() returned");
for (CachedBlock block : blockCache) {
assertNotEquals(BlockType.ENCODED_DATA, block.getBlockType());
assertNotEquals(BlockType.DATA, block.getBlockType());
}
((HRegion) region).close();
}
use of org.apache.hadoop.hbase.ArrayBackedTag in project hbase by apache.
the class TestEncodedSeekers method doPuts.
private void doPuts(Region region) throws IOException {
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
for (int i = 0; i < NUM_ROWS; ++i) {
byte[] key = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
Put put = new Put(key);
put.setDurability(Durability.ASYNC_WAL);
byte[] col = Bytes.toBytes(String.valueOf(j));
byte[] value = dataGenerator.generateRandomSizeValue(key, col);
if (includeTags) {
Tag[] tag = new Tag[1];
tag[0] = new ArrayBackedTag((byte) 1, "Visibility");
KeyValue kv = new KeyValue(key, CF_BYTES, col, HConstants.LATEST_TIMESTAMP, value, tag);
put.add(kv);
} else {
put.addColumn(CF_BYTES, col, value);
}
if (VERBOSE) {
KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value);
System.err.println(Strings.padFront(i + "", ' ', 4) + " " + kvPut);
}
region.put(put);
}
if (i % NUM_ROWS_PER_FLUSH == 0) {
region.flush(true);
}
}
}
Aggregations