Search in sources :

Example 36 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class TestScannersWithFilters method testFirstKeyOnlyFilter.

@Test
public void testFirstKeyOnlyFilter() throws Exception {
    Scan s = new Scan();
    s.setFilter(new FirstKeyOnlyFilter());
    // Expected KVs, the first KV from each of the remaining 6 rows
    KeyValue[] kvs = { new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]) };
    verifyScanFull(s, kvs);
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 37 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class TextSortReducer method reduce.

@Override
protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> context) throws java.io.IOException, InterruptedException {
    // although reduce() is called per-row, handle pathological case
    long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30));
    Iterator<Text> iter = lines.iterator();
    while (iter.hasNext()) {
        Set<KeyValue> kvs = new TreeSet<>(CellComparator.COMPARATOR);
        long curSize = 0;
        // stop at the end or the RAM threshold
        while (iter.hasNext() && curSize < threshold) {
            Text line = iter.next();
            byte[] lineBytes = line.getBytes();
            try {
                ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength());
                // Retrieve timestamp if exists
                ts = parsed.getTimestamp(ts);
                cellVisibilityExpr = parsed.getCellVisibility();
                ttl = parsed.getCellTTL();
                // create tags for the parsed line
                List<Tag> tags = new ArrayList<>();
                if (cellVisibilityExpr != null) {
                    tags.addAll(kvCreator.getVisibilityExpressionResolver().createVisibilityExpTags(cellVisibilityExpr));
                }
                // into puts
                if (ttl > 0) {
                    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl)));
                }
                for (int i = 0; i < parsed.getColumnCount(); i++) {
                    if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
                        continue;
                    }
                    // Creating the KV which needs to be directly written to HFiles. Using the Facade
                    // KVCreator for creation of kvs.
                    Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    kvs.add(kv);
                    curSize += kv.heapSize();
                }
            } catch (ImportTsv.TsvParser.BadTsvLineException | IllegalArgumentException | InvalidLabelException badLine) {
                if (skipBadLines) {
                    System.err.println("Bad line." + badLine.getMessage());
                    incrementBadLineCount(1);
                    continue;
                }
                throw new IOException(badLine);
            }
        }
        context.setStatus("Read " + kvs.size() + " entries of " + kvs.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");
        int index = 0;
        for (KeyValue kv : kvs) {
            context.write(rowKey, kv);
            if (++index > 0 && index % 100 == 0)
                context.setStatus("Wrote " + index + " key values.");
        }
        // if we have more entries to process
        if (iter.hasNext()) {
            // force flush because we cannot guarantee intra-row sorted order
            context.write(null, null);
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) InvalidLabelException(org.apache.hadoop.hbase.security.visibility.InvalidLabelException) TreeSet(java.util.TreeSet) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)

Example 38 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class PutCombiner method reduce.

@Override
protected void reduce(K row, Iterable<Put> vals, Context context) throws IOException, InterruptedException {
    // Using HeapSize to create an upper bound on the memory size of
    // the puts and flush some portion of the content while looping. This
    // flush could result in multiple Puts for a single rowkey. That is
    // acceptable because Combiner is run as an optimization and it's not
    // critical that all Puts are grouped perfectly.
    long threshold = context.getConfiguration().getLong("putcombiner.row.threshold", 1L * (1 << 30));
    int cnt = 0;
    long curSize = 0;
    Put put = null;
    Map<byte[], List<Cell>> familyMap = null;
    for (Put p : vals) {
        cnt++;
        if (put == null) {
            put = p;
            familyMap = put.getFamilyCellMap();
        } else {
            for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap().entrySet()) {
                List<Cell> cells = familyMap.get(entry.getKey());
                List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null;
                for (Cell cell : entry.getValue()) {
                    KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
                    curSize += kv.heapSize();
                    if (kvs != null) {
                        kvs.add(kv);
                    }
                }
                if (cells == null) {
                    familyMap.put(entry.getKey(), entry.getValue());
                }
            }
            if (cnt % 10 == 0)
                context.setStatus("Combine " + cnt);
            if (curSize > threshold) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
                }
                context.write(row, put);
                put = null;
                curSize = 0;
                cnt = 0;
            }
        }
    }
    if (put != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Combined %d Put(s) into %d.", cnt, 1));
        }
        context.write(row, put);
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) List(java.util.List) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put)

Example 39 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class DefaultMemStore method main.

/**
   * Code to help figure if our approximation of object heap sizes is close
   * enough.  See hbase-900.  Fills memstores then waits so user can heap
   * dump and bring up resultant hprof in something like jprofiler which
   * allows you get 'deep size' on objects.
   * @param args main args
   */
public static void main(String[] args) {
    RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
    LOG.info("vmName=" + runtime.getVmName() + ", vmVendor=" + runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
    LOG.info("vmInputArguments=" + runtime.getInputArguments());
    DefaultMemStore memstore1 = new DefaultMemStore();
    // TODO: x32 vs x64
    final int count = 10000;
    byte[] fam = Bytes.toBytes("col");
    byte[] qf = Bytes.toBytes("umn");
    byte[] empty = new byte[0];
    MemstoreSize memstoreSize = new MemstoreSize();
    for (int i = 0; i < count; i++) {
        // Give each its own ts
        memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
    }
    LOG.info("memstore1 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    for (int i = 0; i < count; i++) {
        memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty), memstoreSize);
    }
    LOG.info("memstore1 estimated size (2nd loading of same data)=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    // Make a variably sized memstore.
    DefaultMemStore memstore2 = new DefaultMemStore();
    memstoreSize = new MemstoreSize();
    for (int i = 0; i < count; i++) {
        memstore2.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, new byte[i]), memstoreSize);
    }
    LOG.info("memstore2 estimated size=" + (memstoreSize.getDataSize() + memstoreSize.getHeapSize()));
    final int seconds = 30;
    LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
    LOG.info("Exiting.");
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) RuntimeMXBean(java.lang.management.RuntimeMXBean)

Example 40 with KeyValue

use of org.apache.hadoop.hbase.KeyValue in project hbase by apache.

the class HMobStore method resolve.

/**
   * Reads the cell from the mob file.
   * @param reference The cell found in the HBase, its value is a path to a mob file.
   * @param cacheBlocks Whether the scanner should cache blocks.
   * @param readPt the read point.
   * @param readEmptyValueOnMobCellMiss Whether return null value when the mob file is
   *        missing or corrupt.
   * @return The cell found in the mob file.
   * @throws IOException
   */
public Cell resolve(Cell reference, boolean cacheBlocks, long readPt, boolean readEmptyValueOnMobCellMiss) throws IOException {
    Cell result = null;
    if (MobUtils.hasValidMobRefCellValue(reference)) {
        String fileName = MobUtils.getMobFileName(reference);
        Tag tableNameTag = MobUtils.getTableNameTag(reference);
        if (tableNameTag != null) {
            String tableNameString = TagUtil.getValueAsString(tableNameTag);
            List<Path> locations = map.get(tableNameString);
            if (locations == null) {
                IdLock.Entry lockEntry = keyLock.getLockEntry(tableNameString.hashCode());
                try {
                    locations = map.get(tableNameString);
                    if (locations == null) {
                        locations = new ArrayList<>(2);
                        TableName tn = TableName.valueOf(tableNameString);
                        locations.add(MobUtils.getMobFamilyPath(conf, tn, family.getNameAsString()));
                        locations.add(HFileArchiveUtil.getStoreArchivePath(conf, tn, MobUtils.getMobRegionInfo(tn).getEncodedName(), family.getNameAsString()));
                        map.put(tableNameString, locations);
                    }
                } finally {
                    keyLock.releaseLockEntry(lockEntry);
                }
            }
            result = readCell(locations, fileName, reference, cacheBlocks, readPt, readEmptyValueOnMobCellMiss);
        }
    }
    if (result == null) {
        LOG.warn("The KeyValue result is null, assemble a new KeyValue with the same row,family," + "qualifier,timestamp,type and tags but with an empty value to return.");
        result = new KeyValue(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength(), reference.getFamilyArray(), reference.getFamilyOffset(), reference.getFamilyLength(), reference.getQualifierArray(), reference.getQualifierOffset(), reference.getQualifierLength(), reference.getTimestamp(), Type.codeToType(reference.getTypeByte()), HConstants.EMPTY_BYTE_ARRAY, 0, 0, reference.getTagsArray(), reference.getTagsOffset(), reference.getTagsLength());
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) IdLock(org.apache.hadoop.hbase.util.IdLock) TableName(org.apache.hadoop.hbase.TableName) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Cell(org.apache.hadoop.hbase.Cell)

Aggregations

KeyValue (org.apache.hadoop.hbase.KeyValue)552 Test (org.junit.Test)289 Cell (org.apache.hadoop.hbase.Cell)193 ArrayList (java.util.ArrayList)172 Put (org.apache.hadoop.hbase.client.Put)98 Scan (org.apache.hadoop.hbase.client.Scan)85 Result (org.apache.hadoop.hbase.client.Result)70 Configuration (org.apache.hadoop.conf.Configuration)64 Path (org.apache.hadoop.fs.Path)55 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)36 Tag (org.apache.hadoop.hbase.Tag)35 ByteBuffer (java.nio.ByteBuffer)34 List (java.util.List)34 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)34 IOException (java.io.IOException)32 TableName (org.apache.hadoop.hbase.TableName)32 TreeMap (java.util.TreeMap)29 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)28 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)28 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)27