Search in sources :

Example 21 with InternalScanner

use of org.apache.hadoop.hbase.regionserver.InternalScanner in project hbase by apache.

the class AggregateImplementation method getMin.

/**
   * Gives the minimum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, minimum value for the
   * entire column family will be returned.
   */
@Override
public void getMin(RpcController controller, AggregateRequest request, RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    T min = null;
    try {
        ColumnInterpreter<T, S, P, Q, R> ci = constructColumnInterpreterFromRequest(request);
        T temp;
        Scan scan = ProtobufUtil.toScan(request.getScan());
        scanner = env.getRegion().getScanner(scan);
        List<Cell> results = new ArrayList<>();
        byte[] colFamily = scan.getFamilies()[0];
        NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
        byte[] qualifier = null;
        if (qualifiers != null && !qualifiers.isEmpty()) {
            qualifier = qualifiers.pollFirst();
        }
        boolean hasMoreRows = false;
        do {
            hasMoreRows = scanner.next(results);
            int listSize = results.size();
            for (int i = 0; i < listSize; i++) {
                temp = ci.getValue(colFamily, qualifier, results.get(i));
                min = (min == null || (temp != null && ci.compare(temp, min) < 0)) ? temp : min;
            }
            results.clear();
        } while (hasMoreRows);
        if (min != null) {
            response = AggregateResponse.newBuilder().addFirstPart(ci.getProtoForCellType(min).toByteString()).build();
        }
    } catch (IOException e) {
        CoprocessorRpcUtils.setControllerException(controller, e);
    } finally {
        if (scanner != null) {
            try {
                scanner.close();
            } catch (IOException ignored) {
            }
        }
    }
    log.info("Minimum from this region is " + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + min);
    done.run(response);
}
Also used : InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) AggregateResponse(org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 22 with InternalScanner

use of org.apache.hadoop.hbase.regionserver.InternalScanner in project hbase by apache.

the class AggregateImplementation method getMax.

/**
   * Gives the maximum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, maximum value for the
   * entire column family will be returned.
   */
@Override
public void getMax(RpcController controller, AggregateRequest request, RpcCallback<AggregateResponse> done) {
    InternalScanner scanner = null;
    AggregateResponse response = null;
    T max = null;
    try {
        ColumnInterpreter<T, S, P, Q, R> ci = constructColumnInterpreterFromRequest(request);
        T temp;
        Scan scan = ProtobufUtil.toScan(request.getScan());
        scanner = env.getRegion().getScanner(scan);
        List<Cell> results = new ArrayList<>();
        byte[] colFamily = scan.getFamilies()[0];
        NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
        byte[] qualifier = null;
        if (qualifiers != null && !qualifiers.isEmpty()) {
            qualifier = qualifiers.pollFirst();
        }
        // qualifier can be null.
        boolean hasMoreRows = false;
        do {
            hasMoreRows = scanner.next(results);
            int listSize = results.size();
            for (int i = 0; i < listSize; i++) {
                temp = ci.getValue(colFamily, qualifier, results.get(i));
                max = (max == null || (temp != null && ci.compare(temp, max) > 0)) ? temp : max;
            }
            results.clear();
        } while (hasMoreRows);
        if (max != null) {
            AggregateResponse.Builder builder = AggregateResponse.newBuilder();
            builder.addFirstPart(ci.getProtoForCellType(max).toByteString());
            response = builder.build();
        }
    } catch (IOException e) {
        CoprocessorRpcUtils.setControllerException(controller, e);
    } finally {
        if (scanner != null) {
            try {
                scanner.close();
            } catch (IOException ignored) {
            }
        }
    }
    log.info("Maximum from this region is " + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + max);
    done.run(response);
}
Also used : InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) AggregateResponse(org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 23 with InternalScanner

use of org.apache.hadoop.hbase.regionserver.InternalScanner in project hbase by apache.

the class AggregateImplementation method getAvg.

/**
   * Gives a Pair with first object as Sum and second object as row count,
   * computed for a given combination of column qualifier and column family in
   * the given row range as defined in the Scan object. In its current
   * implementation, it takes one column family and one column qualifier (if
   * provided). In case of null column qualifier, an aggregate sum over all the
   * entire column family will be returned.
   * <p>
   * The average is computed in
   * AggregationClient#avg(byte[], ColumnInterpreter, Scan) by
   * processing results from all regions, so its "ok" to pass sum and a Long
   * type.
   */
@Override
public void getAvg(RpcController controller, AggregateRequest request, RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    try {
        ColumnInterpreter<T, S, P, Q, R> ci = constructColumnInterpreterFromRequest(request);
        S sumVal = null;
        Long rowCountVal = 0l;
        Scan scan = ProtobufUtil.toScan(request.getScan());
        scanner = env.getRegion().getScanner(scan);
        byte[] colFamily = scan.getFamilies()[0];
        NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
        byte[] qualifier = null;
        if (qualifiers != null && !qualifiers.isEmpty()) {
            qualifier = qualifiers.pollFirst();
        }
        List<Cell> results = new ArrayList<>();
        boolean hasMoreRows = false;
        do {
            results.clear();
            hasMoreRows = scanner.next(results);
            int listSize = results.size();
            for (int i = 0; i < listSize; i++) {
                sumVal = ci.add(sumVal, ci.castToReturnType(ci.getValue(colFamily, qualifier, results.get(i))));
            }
            rowCountVal++;
        } while (hasMoreRows);
        if (sumVal != null) {
            ByteString first = ci.getProtoForPromotedType(sumVal).toByteString();
            AggregateResponse.Builder pair = AggregateResponse.newBuilder();
            pair.addFirstPart(first);
            ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal);
            bb.rewind();
            pair.setSecondPart(ByteString.copyFrom(bb));
            response = pair.build();
        }
    } catch (IOException e) {
        CoprocessorRpcUtils.setControllerException(controller, e);
    } finally {
        if (scanner != null) {
            try {
                scanner.close();
            } catch (IOException ignored) {
            }
        }
    }
    done.run(response);
}
Also used : InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) ByteString(com.google.protobuf.ByteString) AggregateResponse(org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 24 with InternalScanner

use of org.apache.hadoop.hbase.regionserver.InternalScanner in project hbase by apache.

the class AggregateImplementation method getSum.

/**
   * Gives the sum for a given combination of column qualifier and column
   * family, in the given row range as defined in the Scan object. In its
   * current implementation, it takes one column family and one column qualifier
   * (if provided). In case of null column qualifier, sum for the entire column
   * family will be returned.
   */
@Override
public void getSum(RpcController controller, AggregateRequest request, RpcCallback<AggregateResponse> done) {
    AggregateResponse response = null;
    InternalScanner scanner = null;
    long sum = 0l;
    try {
        ColumnInterpreter<T, S, P, Q, R> ci = constructColumnInterpreterFromRequest(request);
        S sumVal = null;
        T temp;
        Scan scan = ProtobufUtil.toScan(request.getScan());
        scanner = env.getRegion().getScanner(scan);
        byte[] colFamily = scan.getFamilies()[0];
        NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
        byte[] qualifier = null;
        if (qualifiers != null && !qualifiers.isEmpty()) {
            qualifier = qualifiers.pollFirst();
        }
        List<Cell> results = new ArrayList<>();
        boolean hasMoreRows = false;
        do {
            hasMoreRows = scanner.next(results);
            int listSize = results.size();
            for (int i = 0; i < listSize; i++) {
                temp = ci.getValue(colFamily, qualifier, results.get(i));
                if (temp != null)
                    sumVal = ci.add(sumVal, ci.castToReturnType(temp));
            }
            results.clear();
        } while (hasMoreRows);
        if (sumVal != null) {
            response = AggregateResponse.newBuilder().addFirstPart(ci.getProtoForPromotedType(sumVal).toByteString()).build();
        }
    } catch (IOException e) {
        CoprocessorRpcUtils.setControllerException(controller, e);
    } finally {
        if (scanner != null) {
            try {
                scanner.close();
            } catch (IOException ignored) {
            }
        }
    }
    log.debug("Sum from this region is " + env.getRegion().getRegionInfo().getRegionNameAsString() + ": " + sum);
    done.run(response);
}
Also used : InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) AggregateResponse(org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 25 with InternalScanner

use of org.apache.hadoop.hbase.regionserver.InternalScanner in project hbase by apache.

the class AggregateImplementation method getStd.

/**
   * Gives a Pair with first object a List containing Sum and sum of squares,
   * and the second object as row count. It is computed for a given combination of
   * column qualifier and column family in the given row range as defined in the
   * Scan object. In its current implementation, it takes one column family and
   * one column qualifier (if provided). The idea is get the value of variance first:
   * the average of the squares less the square of the average a standard
   * deviation is square root of variance.
   */
@Override
public void getStd(RpcController controller, AggregateRequest request, RpcCallback<AggregateResponse> done) {
    InternalScanner scanner = null;
    AggregateResponse response = null;
    try {
        ColumnInterpreter<T, S, P, Q, R> ci = constructColumnInterpreterFromRequest(request);
        S sumVal = null, sumSqVal = null, tempVal = null;
        long rowCountVal = 0l;
        Scan scan = ProtobufUtil.toScan(request.getScan());
        scanner = env.getRegion().getScanner(scan);
        byte[] colFamily = scan.getFamilies()[0];
        NavigableSet<byte[]> qualifiers = scan.getFamilyMap().get(colFamily);
        byte[] qualifier = null;
        if (qualifiers != null && !qualifiers.isEmpty()) {
            qualifier = qualifiers.pollFirst();
        }
        List<Cell> results = new ArrayList<>();
        boolean hasMoreRows = false;
        do {
            tempVal = null;
            hasMoreRows = scanner.next(results);
            int listSize = results.size();
            for (int i = 0; i < listSize; i++) {
                tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily, qualifier, results.get(i))));
            }
            results.clear();
            sumVal = ci.add(sumVal, tempVal);
            sumSqVal = ci.add(sumSqVal, ci.multiply(tempVal, tempVal));
            rowCountVal++;
        } while (hasMoreRows);
        if (sumVal != null) {
            ByteString first_sumVal = ci.getProtoForPromotedType(sumVal).toByteString();
            ByteString first_sumSqVal = ci.getProtoForPromotedType(sumSqVal).toByteString();
            AggregateResponse.Builder pair = AggregateResponse.newBuilder();
            pair.addFirstPart(first_sumVal);
            pair.addFirstPart(first_sumSqVal);
            ByteBuffer bb = ByteBuffer.allocate(8).putLong(rowCountVal);
            bb.rewind();
            pair.setSecondPart(ByteString.copyFrom(bb));
            response = pair.build();
        }
    } catch (IOException e) {
        CoprocessorRpcUtils.setControllerException(controller, e);
    } finally {
        if (scanner != null) {
            try {
                scanner.close();
            } catch (IOException ignored) {
            }
        }
    }
    done.run(response);
}
Also used : InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) ByteString(com.google.protobuf.ByteString) AggregateResponse(org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Aggregations

InternalScanner (org.apache.hadoop.hbase.regionserver.InternalScanner)44 ArrayList (java.util.ArrayList)41 Cell (org.apache.hadoop.hbase.Cell)36 Scan (org.apache.hadoop.hbase.client.Scan)34 Test (org.junit.Test)17 IOException (java.io.IOException)15 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)12 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)12 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)9 Put (org.apache.hadoop.hbase.client.Put)9 List (java.util.List)7 AggregateResponse (org.apache.hadoop.hbase.protobuf.generated.AggregateProtos.AggregateResponse)7 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)7 Region (org.apache.hadoop.hbase.regionserver.Region)6 Configuration (org.apache.hadoop.conf.Configuration)5 KeyValue (org.apache.hadoop.hbase.KeyValue)5 HashMap (java.util.HashMap)4 ScanType (org.apache.hadoop.hbase.regionserver.ScanType)4 StoreFileScanner (org.apache.hadoop.hbase.regionserver.StoreFileScanner)4 ByteString (com.google.protobuf.ByteString)3