Search in sources :

Example 16 with RegionScanner

use of org.apache.hadoop.hbase.regionserver.RegionScanner in project cdap by caskdata.

the class IncrementSummingScannerTest method testIncrementScanning.

@Test
public void testIncrementScanning() throws Exception {
    TableId tableId = TableId.from(NamespaceId.DEFAULT.getNamespace(), "TestIncrementSummingScanner");
    byte[] familyBytes = Bytes.toBytes("f");
    byte[] columnBytes = Bytes.toBytes("c");
    HRegion region = createRegion(tableId, familyBytes);
    try {
        region.initialize();
        // test handling of a single increment value alone
        Put p = new Put(Bytes.toBytes("r1"));
        p.add(familyBytes, columnBytes, Bytes.toBytes(3L));
        p.setAttribute(HBaseTable.DELTA_WRITE, TRUE);
        region.put(p);
        Scan scan = new Scan();
        RegionScanner scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.USER_SCAN);
        List<Cell> results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(1, results.size());
        Cell cell = results.get(0);
        assertNotNull(cell);
        assertEquals(3L, Bytes.toLong(cell.getValue()));
        // test handling of a single total sum
        p = new Put(Bytes.toBytes("r2"));
        p.add(familyBytes, columnBytes, Bytes.toBytes(5L));
        region.put(p);
        scan = new Scan(Bytes.toBytes("r2"));
        scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.USER_SCAN);
        results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(1, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals(5L, Bytes.toLong(cell.getValue()));
        // test handling of multiple increment values
        long now = System.currentTimeMillis();
        p = new Put(Bytes.toBytes("r3"));
        for (int i = 0; i < 5; i++) {
            p.add(familyBytes, columnBytes, now - i, Bytes.toBytes((long) (i + 1)));
        }
        p.setAttribute(HBaseTable.DELTA_WRITE, TRUE);
        region.put(p);
        scan = new Scan(Bytes.toBytes("r3"));
        scan.setMaxVersions();
        scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.USER_SCAN);
        results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(1, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals(15L, Bytes.toLong(cell.getValue()));
        // test handling of multiple increment values followed by a total sum, then other increments
        now = System.currentTimeMillis();
        p = new Put(Bytes.toBytes("r4"));
        for (int i = 0; i < 3; i++) {
            p.add(familyBytes, columnBytes, now - i, Bytes.toBytes(1L));
        }
        p.setAttribute(HBaseTable.DELTA_WRITE, TRUE);
        region.put(p);
        // this put will appear as a "total" sum prior to all the delta puts
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, columnBytes, now - 5, Bytes.toBytes(5L));
        region.put(p);
        scan = new Scan(Bytes.toBytes("r4"));
        scan.setMaxVersions();
        scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.USER_SCAN);
        results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(1, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals(8L, Bytes.toLong(cell.getValue()));
        // test handling of an increment column followed by a non-increment column
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, Bytes.toBytes("c2"), Bytes.toBytes("value"));
        region.put(p);
        scan = new Scan(Bytes.toBytes("r4"));
        scan.setMaxVersions();
        scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.USER_SCAN);
        results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(2, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals(8L, Bytes.toLong(cell.getValue()));
        cell = results.get(1);
        assertNotNull(cell);
        assertEquals("value", Bytes.toString(cell.getValue()));
        // test handling of an increment column followed by a delete
        now = System.currentTimeMillis();
        Delete d = new Delete(Bytes.toBytes("r5"));
        d.deleteColumn(familyBytes, columnBytes, now - 3);
        region.delete(d);
        p = new Put(Bytes.toBytes("r5"));
        for (int i = 2; i >= 0; i--) {
            p.add(familyBytes, columnBytes, now - i, Bytes.toBytes(1L));
        }
        p.setAttribute(HBaseTable.DELTA_WRITE, TRUE);
        region.put(p);
        scan = new Scan(Bytes.toBytes("r5"));
        scan.setMaxVersions();
        scan.setRaw(true);
        scanner = new IncrementSummingScanner(region, -1, region.getScanner(scan), ScanType.COMPACT_RETAIN_DELETES);
        results = Lists.newArrayList();
        scanner.next(results);
        // delete marker will not be returned for user scan
        assertEquals(2, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals(3L, Bytes.toLong(cell.getValue(), IncrementHandlerState.DELTA_MAGIC_PREFIX.length, 8));
        // next cell should be the delete
        cell = results.get(1);
        assertTrue(CellUtil.isDelete(cell));
    } finally {
        region.close();
    }
}
Also used : TableId(co.cask.cdap.data2.util.TableId) Delete(org.apache.hadoop.hbase.client.Delete) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put) Test(org.junit.Test) HBase11Test(co.cask.cdap.data.hbase.HBase11Test)

Example 17 with RegionScanner

use of org.apache.hadoop.hbase.regionserver.RegionScanner in project cdap by caskdata.

the class IncrementHandler method preGetOp.

@Override
public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> ctx, Get get, List<Cell> results) throws IOException {
    Scan scan = new Scan(get);
    scan.setMaxVersions();
    scan.setFilter(Filters.combine(new IncrementFilter(), scan.getFilter()));
    RegionScanner scanner = null;
    try {
        scanner = new IncrementSummingScanner(region, scan.getBatch(), region.getScanner(scan), ScanType.USER_SCAN);
        scanner.next(results);
        ctx.bypass();
    } finally {
        if (scanner != null) {
            scanner.close();
        }
    }
}
Also used : RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scan(org.apache.hadoop.hbase.client.Scan)

Example 18 with RegionScanner

use of org.apache.hadoop.hbase.regionserver.RegionScanner in project cdap by caskdata.

the class IncrementSummingScannerTest method testWithBatchLimit.

@Test
public void testWithBatchLimit() throws Exception {
    TableId tableId = TableId.from(NamespaceId.DEFAULT.getNamespace(), "testWithBatchLimit");
    byte[] familyBytes = Bytes.toBytes("f");
    byte[] columnBytes = Bytes.toBytes("c2");
    HRegion region = createRegion(tableId, familyBytes);
    try {
        region.initialize();
        long now = System.currentTimeMillis();
        // put a non increment columns
        Put p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, Bytes.toBytes("c1"), Bytes.toBytes("value1"));
        region.put(p);
        // now put some increment deltas in a column
        p = new Put(Bytes.toBytes("r4"));
        for (int i = 0; i < 3; i++) {
            p.add(familyBytes, columnBytes, now - i, Bytes.toBytes(1L));
        }
        p.setAttribute(HBaseTable.DELTA_WRITE, TRUE);
        region.put(p);
        // put some non - increment columns
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, Bytes.toBytes("c3"), Bytes.toBytes("value3"));
        region.put(p);
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, Bytes.toBytes("c4"), Bytes.toBytes("value4"));
        region.put(p);
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, Bytes.toBytes("c5"), Bytes.toBytes("value5"));
        region.put(p);
        // this put will appear as a "total" sum prior to all the delta puts
        p = new Put(Bytes.toBytes("r4"));
        p.add(familyBytes, columnBytes, now - 5, Bytes.toBytes(5L));
        region.put(p);
        Scan scan = new Scan(Bytes.toBytes("r4"));
        scan.setMaxVersions();
        RegionScanner scanner = new IncrementSummingScanner(region, 3, region.getScanner(scan), ScanType.USER_SCAN);
        List<Cell> results = Lists.newArrayList();
        scanner.next(results);
        assertEquals(3, results.size());
        Cell cell = results.get(0);
        assertNotNull(cell);
        assertEquals("value1", Bytes.toString(cell.getValue()));
        cell = results.get(1);
        assertNotNull(cell);
        assertEquals(8L, Bytes.toLong(cell.getValue()));
        cell = results.get(2);
        assertNotNull(cell);
        assertEquals("value3", Bytes.toString(cell.getValue()));
    } finally {
        region.close();
    }
}
Also used : TableId(co.cask.cdap.data2.util.TableId) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put) HBase12CDH570Test(co.cask.cdap.data.hbase.HBase12CDH570Test) Test(org.junit.Test)

Example 19 with RegionScanner

use of org.apache.hadoop.hbase.regionserver.RegionScanner in project cdap by caskdata.

the class IncrementSummingScannerTest method testMultiColumnFlushAndCompact.

@Test
public void testMultiColumnFlushAndCompact() throws Exception {
    TableId tableId = TableId.from(NamespaceId.DEFAULT.getNamespace(), "testMultiColumnFlushAndCompact");
    byte[] familyBytes = Bytes.toBytes("f");
    byte[] columnBytes = Bytes.toBytes("c");
    byte[] columnBytes2 = Bytes.toBytes("c2");
    HRegion region = createRegion(tableId, familyBytes);
    try {
        region.initialize();
        long now = 1;
        byte[] row1 = Bytes.toBytes("row1");
        byte[] row2 = Bytes.toBytes("row2");
        // Initial put to row1,c2
        Put row1P = new Put(row1);
        row1P.add(familyBytes, columnBytes2, now - 1, Bytes.toBytes(5L));
        region.put(row1P);
        // Initial put to row2,c
        Put row2P = new Put(row2);
        row2P.add(familyBytes, columnBytes, now - 1, Bytes.toBytes(10L));
        region.put(row2P);
        // Generate some increments
        long ts = now;
        for (int i = 0; i < 50; i++) {
            region.put(generateIncrementPut(familyBytes, columnBytes, row1, ts));
            region.put(generateIncrementPut(familyBytes, columnBytes, row2, ts));
            region.put(generateIncrementPut(familyBytes, columnBytes2, row1, ts));
            ts++;
        }
        // First scanner represents flush scanner
        RegionScanner scanner = new IncrementSummingScanner(region, -1, region.getScanner(new Scan().setMaxVersions()), ScanType.COMPACT_RETAIN_DELETES, now + 15, -1);
        // Second scanner is a user scan, this is to help in easy asserts
        scanner = new IncrementSummingScanner(region, -1, scanner, ScanType.USER_SCAN);
        List<Cell> results = Lists.newArrayList();
        assertTrue(scanner.next(results, ScannerContext.newBuilder().setBatchLimit(10).build()));
        assertEquals(2, results.size());
        Cell cell = results.get(0);
        assertNotNull(cell);
        assertEquals("row1", Bytes.toString(cell.getRow()));
        assertEquals("c", Bytes.toString(cell.getQualifier()));
        assertEquals(50, Bytes.toLong(cell.getValue()));
        cell = results.get(1);
        assertNotNull(cell);
        assertEquals("row1", Bytes.toString(cell.getRow()));
        assertEquals("c2", Bytes.toString(cell.getQualifier()));
        assertEquals(55, Bytes.toLong(cell.getValue()));
        results.clear();
        assertFalse(scanner.next(results, ScannerContext.newBuilder().setBatchLimit(10).build()));
        assertEquals(1, results.size());
        cell = results.get(0);
        assertNotNull(cell);
        assertEquals("row2", Bytes.toString(cell.getRow()));
        assertEquals(60, Bytes.toLong(cell.getValue()));
    } finally {
        region.close();
    }
}
Also used : TableId(co.cask.cdap.data2.util.TableId) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put) HBase12CDH570Test(co.cask.cdap.data.hbase.HBase12CDH570Test) Test(org.junit.Test)

Example 20 with RegionScanner

use of org.apache.hadoop.hbase.regionserver.RegionScanner in project phoenix by apache.

the class RegionScannerFactory method getWrappedScanner.

/**
 * Return wrapped scanner that catches unexpected exceptions (i.e. Phoenix bugs) and
 * re-throws as DoNotRetryIOException to prevent needless retrying hanging the query
 * for 30 seconds. Unfortunately, until HBASE-7481 gets fixed, there's no way to do
 * the same from a custom filter.
 * @param arrayKVRefs
 * @param arrayFuncRefs
 * @param offset starting position in the rowkey.
 * @param scan
 * @param tupleProjector
 * @param dataRegion
 * @param indexMaintainer
 * @param tx current transaction
 * @param viewConstants
 */
public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env, final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, final int offset, final Scan scan, final ColumnReference[] dataColumns, final TupleProjector tupleProjector, final Region dataRegion, final IndexMaintainer indexMaintainer, PhoenixTransactionContext tx, final byte[][] viewConstants, final KeyValueSchema kvSchema, final ValueBitSet kvSchemaBitSet, final TupleProjector projector, final ImmutableBytesWritable ptr, final boolean useQualifierAsListIndex) {
    return new RegionScanner() {

        private boolean hasReferences = checkForReferenceFiles();

        private HRegionInfo regionInfo = env.getRegionInfo();

        private byte[] actualStartKey = getActualStartKey();

        // If there are any reference files after local index region merge some cases we might
        // get the records less than scan start row key. This will happen when we replace the
        // actual region start key with merge region start key. This method gives whether are
        // there any reference files in the region or not.
        private boolean checkForReferenceFiles() {
            if (!ScanUtil.isLocalIndex(scan))
                return false;
            for (byte[] family : scan.getFamilies()) {
                if (getRegion().getStore(family).hasReferences()) {
                    return true;
                }
            }
            return false;
        }

        // Get the actual scan start row of local index. This will be used to compare the row
        // key of the results less than scan start row when there are references.
        public byte[] getActualStartKey() {
            return ScanUtil.isLocalIndex(scan) ? ScanUtil.getActualStartRow(scan, regionInfo) : null;
        }

        @Override
        public boolean next(List<Cell> results) throws IOException {
            try {
                return s.next(results);
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        @Override
        public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
            throw new IOException("Next with scannerContext should not be called in Phoenix environment");
        }

        @Override
        public void close() throws IOException {
            s.close();
        }

        @Override
        public HRegionInfo getRegionInfo() {
            return s.getRegionInfo();
        }

        @Override
        public boolean isFilterDone() throws IOException {
            return s.isFilterDone();
        }

        @Override
        public boolean reseek(byte[] row) throws IOException {
            return s.reseek(row);
        }

        @Override
        public long getMvccReadPoint() {
            return s.getMvccReadPoint();
        }

        @Override
        public boolean nextRaw(List<Cell> result) throws IOException {
            try {
                boolean next = s.nextRaw(result);
                Cell arrayElementCell = null;
                if (result.size() == 0) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                if (ScanUtil.isLocalIndex(scan) && !ScanUtil.isAnalyzeTable(scan)) {
                    if (hasReferences && actualStartKey != null) {
                        next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, null, arrayElementCell);
                        if (result.isEmpty()) {
                            return next;
                        }
                    }
                    /* In the following, c is only used when data region is null.
            dataRegion will never be null in case of non-coprocessor call,
            therefore no need to refactor
             */
                    IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
                }
                if (projector != null) {
                    Tuple toProject = useQualifierAsListIndex ? new PositionBasedResultTuple(result) : new ResultTuple(Result.create(result));
                    Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
                    result.clear();
                    result.add(tuple.getValue(0));
                    if (arrayElementCell != null) {
                        result.add(arrayElementCell);
                    }
                }
                // There is a scanattribute set to retrieve the specific array element
                return next;
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        @Override
        public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws IOException {
            boolean res = next(result);
            ScannerContextUtil.incrementSizeProgress(scannerContext, result);
            ScannerContextUtil.updateTimeProgress(scannerContext);
            return res;
        }

        /**
         * When there is a merge in progress while scanning local indexes we might get the key values less than scan start row.
         * In that case we need to scan until get the row key more or  equal to scan start key.
         * TODO try to fix this case in LocalIndexStoreFileScanner when there is a merge.
         */
        private boolean scanTillScanStartRow(final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result, ScannerContext scannerContext, Cell arrayElementCell) throws IOException {
            boolean next = true;
            Cell firstCell = result.get(0);
            while (Bytes.compareTo(firstCell.getRowArray(), firstCell.getRowOffset(), firstCell.getRowLength(), actualStartKey, 0, actualStartKey.length) < 0) {
                result.clear();
                if (scannerContext == null) {
                    next = s.nextRaw(result);
                } else {
                    next = s.nextRaw(result, scannerContext);
                }
                if (result.isEmpty()) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                firstCell = result.get(0);
            }
            return next;
        }

        private int replaceArrayIndexElement(final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result) {
            // make a copy of the results array here, as we're modifying it below
            MultiKeyValueTuple tuple = new MultiKeyValueTuple(ImmutableList.copyOf(result));
            // The size of both the arrays would be same?
            // Using KeyValueSchema to set and retrieve the value
            // collect the first kv to get the row
            Cell rowKv = result.get(0);
            for (KeyValueColumnExpression kvExp : arrayKVRefs) {
                if (kvExp.evaluate(tuple, ptr)) {
                    ListIterator<Cell> itr = result.listIterator();
                    while (itr.hasNext()) {
                        Cell kv = itr.next();
                        if (Bytes.equals(kvExp.getColumnFamily(), 0, kvExp.getColumnFamily().length, kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength()) && Bytes.equals(kvExp.getColumnQualifier(), 0, kvExp.getColumnQualifier().length, kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength())) {
                            // remove the kv that has the full array values.
                            itr.remove();
                            break;
                        }
                    }
                }
            }
            byte[] value = kvSchema.toBytes(tuple, arrayFuncRefs, kvSchemaBitSet, ptr);
            // Add a dummy kv with the exact value of the array index
            result.add(new KeyValue(rowKv.getRowArray(), rowKv.getRowOffset(), rowKv.getRowLength(), QueryConstants.ARRAY_VALUE_COLUMN_FAMILY, 0, QueryConstants.ARRAY_VALUE_COLUMN_FAMILY.length, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER, 0, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER.length, HConstants.LATEST_TIMESTAMP, KeyValue.Type.codeToType(rowKv.getTypeByte()), value, 0, value.length));
            return result.size() - 1;
        }

        @Override
        public long getMaxResultSize() {
            return s.getMaxResultSize();
        }

        @Override
        public int getBatch() {
            return s.getBatch();
        }
    };
}
Also used : Set(java.util.Set) ValueBitSet(org.apache.phoenix.schema.ValueBitSet) IOException(java.io.IOException) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) KeyValueColumnExpression(org.apache.phoenix.expression.KeyValueColumnExpression) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext)

Aggregations

RegionScanner (org.apache.hadoop.hbase.regionserver.RegionScanner)97 Scan (org.apache.hadoop.hbase.client.Scan)75 Cell (org.apache.hadoop.hbase.Cell)59 ArrayList (java.util.ArrayList)35 Test (org.junit.Test)35 Put (org.apache.hadoop.hbase.client.Put)33 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)25 Region (org.apache.hadoop.hbase.regionserver.Region)20 List (java.util.List)18 TableId (co.cask.cdap.data2.util.TableId)17 IOException (java.io.IOException)14 Delete (org.apache.hadoop.hbase.client.Delete)14 RegionCoprocessorEnvironment (org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment)12 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)12 KeyValue (org.apache.hadoop.hbase.KeyValue)11 Configuration (org.apache.hadoop.conf.Configuration)9 ColumnReference (org.apache.phoenix.hbase.index.covered.update.ColumnReference)9 PMetaDataEntity (org.apache.phoenix.schema.PMetaDataEntity)9 InvocationOnMock (org.mockito.invocation.InvocationOnMock)8 Result (org.apache.hadoop.hbase.client.Result)6