use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.
the class TestScannersWithFilters method testFirstKeyOnlyFilter.
@Test
public void testFirstKeyOnlyFilter() throws Exception {
Scan s = new Scan();
s.setFilter(new FirstKeyOnlyFilter());
// Expected KVs, the first KV from each of the remaining 6 rows
KeyValue[] kvs = { new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]) };
verifyScanFull(s, kvs);
}
use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.
the class TextSortReducer method reduce.
@Override
protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> context) throws java.io.IOException, InterruptedException {
// although reduce() is called per-row, handle pathological case
long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
Iterator<Text> iter = lines.iterator();
while (iter.hasNext()) {
Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
long curSize = 0;
// stop at the end or the RAM threshold
while (iter.hasNext() && curSize < threshold) {
Text line = iter.next();
byte[] lineBytes = line.getBytes();
try {
ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
// Retrieve timestamp if exists
ts = parsed.getTimestamp(ts);
cellVisibilityExpr = parsed.getCellVisibility();
ttl = parsed.getCellTTL();
// create tags for the parsed line
List<Tag> tags = new ArrayList<>();
if (cellVisibilityExpr != null) {
tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
}
// into puts
if (ttl > 0) {
tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
}
for (int i = 0; i < parsed.getColumnCount(); i++) {
if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
continue;
}
// Creating the KV which needs to be directly written to HFiles. Using the Facade
// KVCreator for creation of kvs.
Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
kvs.add(kv);
curSize += kv.heapSize();
}
} catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
if (skipBadLines) {
System.err.println("Bad line." + badLine.getMessage());
incrementBadLineCount(1);
continue;
}
throw new IOException(badLine);
}
}
context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
int index = 0;
for (KeyValue kv : kvs) {
context.write(rowKey, kv);
if (++index > 0 && index % 100 == 0)
context.setStatus("Wrote " + index + " key values.");
}
// if we have more entries to process
if (iter.hasNext()) {
// force flush because we cannot guarantee intra-row sorted order
context.write(null, null);
}
}
}
use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.
the class PutCombiner method reduce.
@Override
protected void reduce(K row, Iterable<Put> vals, Context context) throws IOException, InterruptedException {
// Using HeapSize to create an upper bound on the memory size of
// the puts and flush some portion of the content while looping. This
// flush could result in multiple Puts for a single rowkey. That is
// acceptable because Combiner is run as an optimization and it's not
// critical that all Puts are grouped perfectly.
long threshold = context.getConfiguration().getLong("putcombiner.row.threshold", 1L * (1 << 30));
int cnt = 0;
long curSize = 0;
Put put = null;
Map<byte[], List<Cell>> familyMap = null;
for (Put p : vals) {
cnt++;
if (put == null) {
put = p;
familyMap = put.getFamilyCellMap();
} else {
for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap().entrySet()) {
List<Cell> cells = familyMap.get(entry.getKey());
List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
for (Cell cell : entry.getValue()) {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
curSize += kv.heapSize();
if (kvs != null) {
kvs.add(kv);
}
}
if (cells == null) {
familyMap.put(entry.getKey(), entry.getValue());
}
}
if (cnt % 10 == 0)
context.setStatus("Combine " + cnt);
if (curSize > threshold) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
}
context.write(row, put);
put = null;
curSize = 0;
cnt = 0;
}
}
}
if (put != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
}
context.write(row, put);
}
}
use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.
the class DefaultMemStore method main.
/**
* Code to help figure if our approximation of object heap sizes is close
* enough. See hbase-900. Fills memstores then waits so user can heap
* dump and bring up resultant hprof in something like jprofiler which
* allows you get 'deep size' on objects.
* @param args main args
*/
public static void main(String[] args) {
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
LOG.info("vmName=" + runtime.getVmName() + ", vmVendor=" + runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
LOG.info("vmInputArguments=" + runtime.getInputArguments());
DefaultMemStore memstore1 = new DefaultMemStore();
// TODO: x32 vs x64
final int count = 10000;
byte[] fam = Bytes.toBytes("col");
byte[] qf = Bytes.toBytes("umn");
byte[] empty = new byte[0];
MemstoreSize memstoreSize = new MemstoreSize();
for (int i = 0; i < count; i++) {
// Give each its own ts
memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
}
LOG.info("memstore1 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
for (int i = 0; i < count; i++) {
memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
}
LOG.info("memstore1 estimated size (2nd loading of same data)=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
// Make a variably sized memstore.
DefaultMemStore memstore2 = new DefaultMemStore();
memstoreSize = new MemstoreSize();
for (int i = 0; i < count; i++) {
memstore2.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, new byte[i]), memstoreSize);
}
LOG.info("memstore2 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
final int seconds = 30;
LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
LOG.info("Exiting.");
}
use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.
the class HMobStore method resolve.
/**
* Reads the cell from the mob file.
* @param reference The cell found in the HBase, its value is a path to a mob file.
* @param cacheBlocks Whether the scanner should cache blocks.
* @param readPt the read point.
* @param readEmptyValueOnMobCellMiss Whether return null value when the mob file is
* missing or corrupt.
* @return The cell found in the mob file.
* @throws IOException
*/
public Cell resolve(Cell reference, boolean cacheBlocks, long readPt, boolean readEmptyValueOnMobCellMiss) throws IOException {
Cell result = null;
if (MobUtils.hasValidMobRefCellValue(reference)) {
String fileName = MobUtils.getMobFileName(reference);
Tag tableNameTag = MobUtils.getTableNameTag(reference);
if (tableNameTag != null) {
String tableNameString = TagUtil.getValueAsString(tableNameTag);
List<Path> locations = map.get(tableNameString);
if (locations == null) {
IdLock.Entry lockEntry = keyLock.getLockEntry(tableNameString.hashCode());
try {
locations = map.get(tableNameString);
if (locations == null) {
locations = new ArrayList<>(2);
TableName tn = TableName.valueOf(tableNameString);
locations.add(MobUtils.getMobFamilyPath(conf, tn, family.getNameAsString()));
locations.add(HFileArchiveUtil.getStoreArchivePath(conf, tn, MobUtils.getMobRegionInfo(tn).getEncodedName(), family.getNameAsString()));
map.put(tableNameString, locations);
}
} finally {
keyLock.releaseLockEntry(lockEntry);
}
}
result = readCell(locations, fileName, reference, cacheBlocks, readPt, readEmptyValueOnMobCellMiss);
}
}
if (result == null) {
LOG.warn("The KeyValue result is null, assemble a new KeyValue with the same row,family," + "qualifier,timestamp,type and tags but with an empty value to return.");
result = new KeyValue(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength(), reference.getFamilyArray(), reference.getFamilyOffset(), reference.getFamilyLength(), reference.getQualifierArray(), reference.getQualifierOffset(), reference.getQualifierLength(), reference.getTimestamp(), Type.codeToType(reference.getTypeByte()), HConstants.EMPTY_BYTE_ARRAY, 0, 0, reference.getTagsArray(), reference.getTagsOffset(), reference.getTagsLength());
}
return result;
}
Aggregations