Search in sources :

Example 1 with Aggregator

use of org.apache.phoenix.expression.aggregator.Aggregator in project phoenix by apache.

the class QueryCompilerTest method testCountAggregatorFirst.

@Test
public void testCountAggregatorFirst() throws Exception {
    String[] queries = new String[] { "SELECT sum(2.5),organization_id FROM atable GROUP BY organization_id,entity_id", "SELECT avg(a_integer) FROM atable GROUP BY organization_id,substr(entity_id,1,3),entity_id", "SELECT count(a_string) FROM atable GROUP BY substr(organization_id,1),entity_id", "SELECT min('foo') FROM atable GROUP BY entity_id,organization_id", "SELECT min('foo'),sum(a_integer),avg(2.5),4.5,max(b_string) FROM atable GROUP BY substr(organization_id,1),entity_id", "SELECT sum(2.5) FROM atable", "SELECT avg(a_integer) FROM atable", "SELECT count(a_string) FROM atable", "SELECT min('foo') FROM atable LIMIT 5", "SELECT min('foo'),sum(a_integer),avg(2.5),4.5,max(b_string) FROM atable" };
    List<Object> binds = Collections.emptyList();
    String query = null;
    try {
        for (int i = 0; i < queries.length; i++) {
            query = queries[i];
            Scan scan = compileQuery(query, binds);
            ServerAggregators aggregators = ServerAggregators.deserialize(scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), null);
            Aggregator aggregator = aggregators.getAggregators()[0];
            assertTrue(aggregator instanceof CountAggregator);
        }
    } catch (Exception e) {
        throw new Exception(query, e);
    }
}
Also used : CountAggregator(org.apache.phoenix.expression.aggregator.CountAggregator) ServerAggregators(org.apache.phoenix.expression.aggregator.ServerAggregators) CountAggregator(org.apache.phoenix.expression.aggregator.CountAggregator) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) Scan(org.apache.hadoop.hbase.client.Scan) AmbiguousColumnException(org.apache.phoenix.schema.AmbiguousColumnException) SQLException(java.sql.SQLException) ColumnAlreadyExistsException(org.apache.phoenix.schema.ColumnAlreadyExistsException) ColumnNotFoundException(org.apache.phoenix.schema.ColumnNotFoundException) Test(org.junit.Test) BaseConnectionlessQueryTest(org.apache.phoenix.query.BaseConnectionlessQueryTest)

Example 2 with Aggregator

use of org.apache.phoenix.expression.aggregator.Aggregator in project phoenix by apache.

the class SpillableGroupByCache method getScanner.

@Override
public RegionScanner getScanner(final RegionScanner s) {
    final Iterator<Entry<ImmutableBytesWritable, Aggregator[]>> cacheIter = new EntryIterator();
    // scanner using the spillable implementation
    return new BaseRegionScanner(s) {

        @Override
        public void close() throws IOException {
            try {
                s.close();
            } finally {
                // Always close gbCache and swallow possible Exceptions
                Closeables.closeQuietly(SpillableGroupByCache.this);
            }
        }

        @Override
        public boolean next(List<Cell> results) throws IOException {
            if (!cacheIter.hasNext()) {
                return false;
            }
            Map.Entry<ImmutableBytesWritable, Aggregator[]> ce = cacheIter.next();
            ImmutableBytesWritable key = ce.getKey();
            Aggregator[] aggs = ce.getValue();
            byte[] value = aggregators.toBytes(aggs);
            if (logger.isDebugEnabled()) {
                logger.debug("Adding new distinct group: " + Bytes.toStringBinary(key.get(), key.getOffset(), key.getLength()) + " with aggregators " + aggs.toString() + " value = " + Bytes.toStringBinary(value));
            }
            results.add(KeyValueUtil.newKeyValue(key.get(), key.getOffset(), key.getLength(), SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
            return cacheIter.hasNext();
        }
    };
}
Also used : Entry(java.util.Map.Entry) CacheEntry(org.apache.phoenix.cache.aggcache.SpillManager.CacheEntry) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) BaseRegionScanner(org.apache.phoenix.coprocessor.BaseRegionScanner) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 3 with Aggregator

use of org.apache.phoenix.expression.aggregator.Aggregator in project phoenix by apache.

the class SpillableGroupByCache method cache.

/**
 * Extract an element from the Cache If element is not present in in-memory cache / or in spill files cache
 * implements an implicit put() of a new key/value tuple and loads it into the cache
 */
@Override
public Aggregator[] cache(ImmutableBytesPtr cacheKey) {
    ImmutableBytesPtr key = new ImmutableBytesPtr(cacheKey);
    Aggregator[] rowAggregators = cache.get(key);
    if (rowAggregators == null) {
        // per distinct value)
        if (spillManager != null) {
            // spilled before
            try {
                rowAggregators = spillManager.loadEntry(key);
            } catch (IOException ioe) {
                // Ensure that we always close and delete the temp files
                try {
                    throw new RuntimeException(ioe);
                } finally {
                    Closeables.closeQuietly(SpillableGroupByCache.this);
                }
            }
        }
        if (rowAggregators == null) {
            // No, key never spilled before, create a new tuple
            rowAggregators = aggregators.newAggregators(env.getConfiguration());
            if (logger.isDebugEnabled()) {
                logger.debug("Adding new aggregate bucket for row key " + Bytes.toStringBinary(key.get(), key.getOffset(), key.getLength()));
            }
        }
        if (cache.put(key, rowAggregators) == null) {
            totalNumElements++;
        }
    }
    return rowAggregators;
}
Also used : ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) IOException(java.io.IOException)

Example 4 with Aggregator

use of org.apache.phoenix.expression.aggregator.Aggregator in project phoenix by apache.

the class GroupedAggregateRegionObserver method scanOrdered.

/**
 * Used for an aggregate query in which the key order match the group by key order. In this
 * case, we can do the aggregation as we scan, by detecting when the group by key changes.
 * @param limit TODO
 * @throws IOException
 */
private RegionScanner scanOrdered(final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner scanner, final List<Expression> expressions, final ServerAggregators aggregators, final long limit) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug(LogUtil.addCustomAnnotations("Grouped aggregation over ordered rows with scan " + scan + ", group by " + expressions + ", aggregators " + aggregators, ScanUtil.getCustomAnnotations(scan)));
    }
    final Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
    final boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(minMaxQualifiers);
    return new BaseRegionScanner(scanner) {

        private long rowCount = 0;

        private ImmutableBytesPtr currentKey = null;

        @Override
        public boolean next(List<Cell> results) throws IOException {
            boolean hasMore;
            boolean atLimit;
            boolean aggBoundary = false;
            Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
            ImmutableBytesPtr key = null;
            Aggregator[] rowAggregators = aggregators.getAggregators();
            // If we're calculating no aggregate functions, we can exit at the
            // start of a new row. Otherwise, we have to wait until an agg
            int countOffset = rowAggregators.length == 0 ? 1 : 0;
            Region region = c.getEnvironment().getRegion();
            boolean acquiredLock = false;
            try {
                region.startRegionOperation();
                acquiredLock = true;
                synchronized (scanner) {
                    do {
                        List<Cell> kvs = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
                        // Results are potentially returned even when the return
                        // value of s.next is false
                        // since this is an indication of whether or not there
                        // are more values after the
                        // ones returned
                        hasMore = scanner.nextRaw(kvs);
                        if (!kvs.isEmpty()) {
                            result.setKeyValues(kvs);
                            key = TupleUtil.getConcatenatedValue(result, expressions);
                            aggBoundary = currentKey != null && currentKey.compareTo(key) != 0;
                            if (!aggBoundary) {
                                aggregators.aggregate(rowAggregators, result);
                                if (logger.isDebugEnabled()) {
                                    logger.debug(LogUtil.addCustomAnnotations("Row passed filters: " + kvs + ", aggregated values: " + Arrays.asList(rowAggregators), ScanUtil.getCustomAnnotations(scan)));
                                }
                                currentKey = key;
                            }
                        }
                        atLimit = rowCount + countOffset >= limit;
                    // Do rowCount + 1 b/c we don't have to wait for a complete
                    // row in the case of a DISTINCT with a LIMIT
                    } while (hasMore && !aggBoundary && !atLimit);
                }
            } finally {
                if (acquiredLock)
                    region.closeRegionOperation();
            }
            if (currentKey != null) {
                byte[] value = aggregators.toBytes(rowAggregators);
                KeyValue keyValue = KeyValueUtil.newKeyValue(currentKey.get(), currentKey.getOffset(), currentKey.getLength(), SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length);
                results.add(keyValue);
                if (logger.isDebugEnabled()) {
                    logger.debug(LogUtil.addCustomAnnotations("Adding new aggregate row: " + keyValue + ",for current key " + Bytes.toStringBinary(currentKey.get(), currentKey.getOffset(), currentKey.getLength()) + ", aggregated values: " + Arrays.asList(rowAggregators), ScanUtil.getCustomAnnotations(scan)));
                }
                // the returned result).
                if (aggBoundary) {
                    aggregators.reset(rowAggregators);
                    aggregators.aggregate(rowAggregators, result);
                    currentKey = key;
                    rowCount++;
                    atLimit |= rowCount >= limit;
                }
            }
            // Continue if there are more
            if (!atLimit && (hasMore || aggBoundary)) {
                return true;
            }
            currentKey = null;
            return false;
        }
    };
}
Also used : EncodedColumnQualiferCellsList(org.apache.phoenix.schema.tuple.EncodedColumnQualiferCellsList) KeyValue(org.apache.hadoop.hbase.KeyValue) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) PInteger(org.apache.phoenix.schema.types.PInteger) PositionBasedMultiKeyValueTuple(org.apache.phoenix.schema.tuple.PositionBasedMultiKeyValueTuple) MultiKeyValueTuple(org.apache.phoenix.schema.tuple.MultiKeyValueTuple) PositionBasedMultiKeyValueTuple(org.apache.phoenix.schema.tuple.PositionBasedMultiKeyValueTuple) Region(org.apache.hadoop.hbase.regionserver.Region) EncodedColumnQualiferCellsList(org.apache.phoenix.schema.tuple.EncodedColumnQualiferCellsList) List(java.util.List) ArrayList(java.util.ArrayList) Cell(org.apache.hadoop.hbase.Cell) MultiKeyValueTuple(org.apache.phoenix.schema.tuple.MultiKeyValueTuple) Tuple(org.apache.phoenix.schema.tuple.Tuple) PositionBasedMultiKeyValueTuple(org.apache.phoenix.schema.tuple.PositionBasedMultiKeyValueTuple)

Example 5 with Aggregator

use of org.apache.phoenix.expression.aggregator.Aggregator in project phoenix by apache.

the class RowKeyOrderedAggregateResultIterator method advance.

@Override
protected Tuple advance() throws SQLException {
    Tuple current = this.next;
    boolean traversedIterators = nextTraversedIterators;
    if (current == null) {
        current = nextTuple();
        traversedIterators = this.traversedIterator;
    }
    if (current != null) {
        Tuple previous = current;
        Aggregator[] rowAggregators = null;
        while (true) {
            current = nextTuple();
            if (!traversedIterators || !continueAggregating(previous, current)) {
                break;
            }
            if (rowAggregators == null) {
                rowAggregators = aggregate(previous);
            }
            aggregators.aggregate(rowAggregators, current);
            traversedIterators = this.traversedIterator;
        }
        this.next = current;
        this.nextTraversedIterators = this.traversedIterator;
        if (rowAggregators == null) {
            current = previous;
        } else {
            byte[] value = aggregators.toBytes(rowAggregators);
            current = new SingleKeyValueTuple(KeyValueUtil.newKeyValue(previousKey, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length));
        }
    }
    if (current == null) {
        // Close underlying ResultIterators to free resources sooner rather than later
        close();
    }
    return current;
}
Also used : SingleKeyValueTuple(org.apache.phoenix.schema.tuple.SingleKeyValueTuple) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) Tuple(org.apache.phoenix.schema.tuple.Tuple) SingleKeyValueTuple(org.apache.phoenix.schema.tuple.SingleKeyValueTuple)

Aggregations

Aggregator (org.apache.phoenix.expression.aggregator.Aggregator)11 Tuple (org.apache.phoenix.schema.tuple.Tuple)6 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)5 List (java.util.List)3 Cell (org.apache.hadoop.hbase.Cell)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 Region (org.apache.hadoop.hbase.regionserver.Region)3 EncodedColumnQualiferCellsList (org.apache.phoenix.schema.tuple.EncodedColumnQualiferCellsList)3 MultiKeyValueTuple (org.apache.phoenix.schema.tuple.MultiKeyValueTuple)3 PositionBasedMultiKeyValueTuple (org.apache.phoenix.schema.tuple.PositionBasedMultiKeyValueTuple)3 IOException (java.io.IOException)2 SQLException (java.sql.SQLException)2 ArrayList (java.util.ArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)2 RegionCoprocessorEnvironment (org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment)2 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)2 RegionScanner (org.apache.hadoop.hbase.regionserver.RegionScanner)2 ServerAggregators (org.apache.phoenix.expression.aggregator.ServerAggregators)2 SingleKeyValueTuple (org.apache.phoenix.schema.tuple.SingleKeyValueTuple)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1