Search in sources :

Example 36 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class TestScannersWithFilters method testFirstKeyOnlyFilter.

public void testFirstKeyOnlyFilter() throws Exception {
    Scan s = new Scan();
    s.setFilter(new FirstKeyOnlyFilter());
    // Expected KVs, the first KV from each of the remaining 6 rows
    KeyValue[] kvs = { new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]) };
    verifyScanFull(s, kvs);
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 37 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class TextSortReducer method reduce.

protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> context) throws, InterruptedException {
    // although reduce() is called per-row, handle pathological case
    long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
    Iterator<Text> iter = lines.iterator();
    while (iter.hasNext()) {
        Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
        long curSize = 0;
        // stop at the end or the RAM threshold
        while (iter.hasNext() && curSize < threshold) {
            Text line =;
            byte[] lineBytes = line.getBytes();
            try {
                ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
                // Retrieve timestamp if exists
                ts = parsed.getTimestamp(ts);
                cellVisibilityExpr = parsed.getCellVisibility();
                ttl = parsed.getCellTTL();
                // create tags for the parsed line
                List<Tag> tags = new ArrayList<>();
                if (cellVisibilityExpr != null) {
                // into puts
                if (ttl > 0) {
                    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
                for (int i = 0; i < parsed.getColumnCount(); i++) {
                    if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
                    // Creating the KV which needs to be directly written to HFiles. Using the Facade
                    // KVCreator for creation of kvs.
                    Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    curSize += kv.heapSize();
            } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
                if (skipBadLines) {
                    System.err.println("Bad line." + badLine.getMessage());
                throw new IOException(badLine);
        context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
        int index = 0;
        for (KeyValue kv : kvs) {
            context.write(rowKey, kv);
            if (++index > 0 && index % 100 == 0)
                context.setStatus("Wrote " + index + " key values.");
        // if we have more entries to process
        if (iter.hasNext()) {
            // force flush because we cannot guarantee intra-row sorted order
            context.write(null, null);
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text( IOException( ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) InvalidLabelException( TreeSet(java.util.TreeSet) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)

Example 38 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class PutCombiner method reduce.

protected void reduce(K row, Iterable<Put> vals, Context context) throws IOException, InterruptedException {
    // Using HeapSize to create an upper bound on the memory size of
    // the puts and flush some portion of the content while looping. This
    // flush could result in multiple Puts for a single rowkey. That is
    // acceptable because Combiner is run as an optimization and it's not
    // critical that all Puts are grouped perfectly.
    long threshold = context.getConfiguration().getLong("putcombiner.row.threshold", 1L * (1 << 30));
    int cnt = 0;
    long curSize = 0;
    Put put = null;
    Map<byte[], List<Cell>> familyMap = null;
    for (Put p : vals) {
        if (put == null) {
            put = p;
            familyMap = put.getFamilyCellMap();
        } else {
            for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap().entrySet()) {
                List<Cell> cells = familyMap.get(entry.getKey());
                List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
                for (Cell cell : entry.getValue()) {
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    curSize += kv.heapSize();
                    if (kvs != null) {
                if (cells == null) {
                    familyMap.put(entry.getKey(), entry.getValue());
            if (cnt % 10 == 0)
                context.setStatus("Combine " + cnt);
            if (curSize > threshold) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
                context.write(row, put);
                put = null;
                curSize = 0;
                cnt = 0;
    if (put != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
        context.write(row, put);
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) List(java.util.List) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put)

Example 39 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class DefaultMemStore method main.

   * Code to help figure if our approximation of object heap sizes is close
   * enough.  See hbase-900.  Fills memstores then waits so user can heap
   * dump and bring up resultant hprof in something like jprofiler which
   * allows you get 'deep size' on objects.
   * @param args main args
public static void main(String[] args) {
    RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();"vmName=" + runtime.getVmName() + ", vmVendor=" + runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());"vmInputArguments=" + runtime.getInputArguments());
    DefaultMemStore memstore1 = new DefaultMemStore();
    // TODO: x32 vs x64
    final int count = 10000;
    byte[] fam = Bytes.toBytes("col");
    byte[] qf = Bytes.toBytes("umn");
    byte[] empty = new byte[0];
    MemstoreSize memstoreSize = new MemstoreSize();
    for (int i = 0; i < count; i++) {
        // Give each its own ts
        memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
    }"memstore1 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    for (int i = 0; i < count; i++) {
        memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
    }"memstore1 estimated size (2nd loading of same data)=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    // Make a variably sized memstore.
    DefaultMemStore memstore2 = new DefaultMemStore();
    memstoreSize = new MemstoreSize();
    for (int i = 0; i < count; i++) {
        memstore2.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, new byte[i]), memstoreSize);
    }"memstore2 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    final int seconds = 30;"Waiting " + seconds + " seconds while heap dump is taken");"Exiting.");
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) RuntimeMXBean(

Example 40 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class HMobStore method resolve.

   * Reads the cell from the mob file.
   * @param reference The cell found in the HBase, its value is a path to a mob file.
   * @param cacheBlocks Whether the scanner should cache blocks.
   * @param readPt the read point.
   * @param readEmptyValueOnMobCellMiss Whether return null value when the mob file is
   *        missing or corrupt.
   * @return The cell found in the mob file.
   * @throws IOException
public Cell resolve(Cell reference, boolean cacheBlocks, long readPt, boolean readEmptyValueOnMobCellMiss) throws IOException {
    Cell result = null;
    if (MobUtils.hasValidMobRefCellValue(reference)) {
        String fileName = MobUtils.getMobFileName(reference);
        Tag tableNameTag = MobUtils.getTableNameTag(reference);
        if (tableNameTag != null) {
            String tableNameString = TagUtil.getValueAsString(tableNameTag);
            List<Path> locations = map.get(tableNameString);
            if (locations == null) {
                IdLock.Entry lockEntry = keyLock.getLockEntry(tableNameString.hashCode());
                try {
                    locations = map.get(tableNameString);
                    if (locations == null) {
                        locations = new ArrayList<>(2);
                        TableName tn = TableName.valueOf(tableNameString);
                        locations.add(MobUtils.getMobFamilyPath(conf, tn, family.getNameAsString()));
                        locations.add(HFileArchiveUtil.getStoreArchivePath(conf, tn, MobUtils.getMobRegionInfo(tn).getEncodedName(), family.getNameAsString()));
                        map.put(tableNameString, locations);
                } finally {
            result = readCell(locations, fileName, reference, cacheBlocks, readPt, readEmptyValueOnMobCellMiss);
    if (result == null) {
        LOG.warn("The KeyValue result is null, assemble a new KeyValue with the same row,family," + "qualifier,timestamp,type and tags but with an empty value to return.");
        result = new KeyValue(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength(), reference.getFamilyArray(), reference.getFamilyOffset(), reference.getFamilyLength(), reference.getQualifierArray(), reference.getQualifierOffset(), reference.getQualifierLength(), reference.getTimestamp(), Type.codeToType(reference.getTypeByte()), HConstants.EMPTY_BYTE_ARRAY, 0, 0, reference.getTagsArray(), reference.getTagsOffset(), reference.getTagsLength());
    return result;
Also used : Path(org.apache.hadoop.fs.Path) IdLock(org.apache.hadoop.hbase.util.IdLock) TableName(org.apache.hadoop.hbase.TableName) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)


KeyValue (org.apache.hadoop.hbase.KeyValue)552 Test (org.junit.Test)289 Cell (org.apache.hadoop.hbase.Cell)193 ArrayList (java.util.ArrayList)172 Put (org.apache.hadoop.hbase.client.Put)98 Scan (org.apache.hadoop.hbase.client.Scan)85 Result (org.apache.hadoop.hbase.client.Result)70 Configuration (org.apache.hadoop.conf.Configuration)64 Path (org.apache.hadoop.fs.Path)55 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)36 Tag (org.apache.hadoop.hbase.Tag)35 ByteBuffer (java.nio.ByteBuffer)34 List (java.util.List)34 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)34 IOException ( TableName (org.apache.hadoop.hbase.TableName)32 TreeMap (java.util.TreeMap)29 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)28 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)28 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)27