Search in sources :

Example 1 with BytesComparator

use of org.apache.phoenix.util.ScanUtil.BytesComparator in project phoenix by apache.

the class ScanRanges method create.

public static ScanRanges create(RowKeySchema schema, List<List<KeyRange>> ranges, int[] slotSpan, KeyRange minMaxRange, Integer nBuckets, boolean useSkipScan, int rowTimestampColIndex) {
    int offset = nBuckets == null ? 0 : SaltingUtil.NUM_SALTING_BYTES;
    int nSlots = ranges.size();
    if (nSlots == offset && minMaxRange == KeyRange.EVERYTHING_RANGE) {
        return EVERYTHING;
    } else if (minMaxRange == KeyRange.EMPTY_RANGE || (nSlots == 1 + offset && ranges.get(offset).size() == 1 && ranges.get(offset).get(0) == KeyRange.EMPTY_RANGE)) {
        return NOTHING;
    }
    TimeRange rowTimestampRange = getRowTimestampColumnRange(ranges, schema, rowTimestampColIndex);
    boolean isPointLookup = isPointLookup(schema, ranges, slotSpan, useSkipScan);
    if (isPointLookup) {
        // TODO: consider keeping original to use for serialization as it would be smaller?
        List<byte[]> keys = ScanRanges.getPointKeys(ranges, slotSpan, schema, nBuckets);
        List<KeyRange> keyRanges = Lists.newArrayListWithExpectedSize(keys.size());
        KeyRange unsaltedMinMaxRange = minMaxRange;
        if (nBuckets != null && minMaxRange != KeyRange.EVERYTHING_RANGE) {
            unsaltedMinMaxRange = KeyRange.getKeyRange(stripPrefix(minMaxRange.getLowerRange(), offset), minMaxRange.lowerUnbound(), stripPrefix(minMaxRange.getUpperRange(), offset), minMaxRange.upperUnbound());
        }
        // We have full keys here, so use field from our varbinary schema
        BytesComparator comparator = ScanUtil.getComparator(SchemaUtil.VAR_BINARY_SCHEMA.getField(0));
        for (byte[] key : keys) {
            // Filter now based on unsalted minMaxRange and ignore the point key salt byte
            if (unsaltedMinMaxRange.compareLowerToUpperBound(key, offset, key.length - offset, true, comparator) <= 0 && unsaltedMinMaxRange.compareUpperToLowerBound(key, offset, key.length - offset, true, comparator) >= 0) {
                keyRanges.add(KeyRange.getKeyRange(key));
            }
        }
        ranges = Collections.singletonList(keyRanges);
        useSkipScan = keyRanges.size() > 1;
        // which is not part of the value.
        if (keys.size() > 1 || SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), false, schema.getField(schema.getFieldCount() - 1)) == QueryConstants.DESC_SEPARATOR_BYTE) {
            schema = SchemaUtil.VAR_BINARY_SCHEMA;
            slotSpan = ScanUtil.SINGLE_COLUMN_SLOT_SPAN;
        } else {
            // Keep original schema and don't use skip scan as it's not necessary
            // when there's a single key.
            slotSpan = new int[] { schema.getMaxFields() - 1 };
        }
    }
    List<List<KeyRange>> sortedRanges = Lists.newArrayListWithExpectedSize(ranges.size());
    for (int i = 0; i < ranges.size(); i++) {
        List<KeyRange> sorted = Lists.newArrayList(ranges.get(i));
        Collections.sort(sorted, KeyRange.COMPARATOR);
        sortedRanges.add(ImmutableList.copyOf(sorted));
    }
    // Don't set minMaxRange for point lookup because it causes issues during intersect
    // by going across region boundaries
    KeyRange scanRange = KeyRange.EVERYTHING_RANGE;
    // if (nBuckets == null || (nBuckets != null && (!isPointLookup || !useSkipScanFilter))) {
    if (nBuckets == null || !isPointLookup || !useSkipScan) {
        byte[] minKey = ScanUtil.getMinKey(schema, sortedRanges, slotSpan);
        byte[] maxKey = ScanUtil.getMaxKey(schema, sortedRanges, slotSpan);
        // have anything to filter at the upper end of the range
        if (ScanUtil.crossesPrefixBoundary(maxKey, ScanUtil.getPrefix(minKey, offset), offset)) {
            maxKey = KeyRange.UNBOUND;
        }
        // We won't filter anything at the low end of the range if we just have the salt byte
        if (minKey.length <= offset) {
            minKey = KeyRange.UNBOUND;
        }
        scanRange = KeyRange.getKeyRange(minKey, maxKey);
    }
    if (minMaxRange != KeyRange.EVERYTHING_RANGE) {
        minMaxRange = ScanUtil.convertToInclusiveExclusiveRange(minMaxRange, schema, new ImmutableBytesWritable());
        scanRange = scanRange.intersect(minMaxRange);
    }
    if (scanRange == KeyRange.EMPTY_RANGE) {
        return NOTHING;
    }
    return new ScanRanges(schema, slotSpan, sortedRanges, scanRange, minMaxRange, useSkipScan, isPointLookup, nBuckets, rowTimestampRange);
}
Also used : TimeRange(org.apache.hadoop.hbase.io.TimeRange) BytesComparator(org.apache.phoenix.util.ScanUtil.BytesComparator) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyRange(org.apache.phoenix.query.KeyRange) FilterList(org.apache.hadoop.hbase.filter.FilterList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List)

Example 2 with BytesComparator

use of org.apache.phoenix.util.ScanUtil.BytesComparator in project phoenix by apache.

the class SkipScanFilter method navigate.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "QBA_QUESTIONABLE_BOOLEAN_ASSIGNMENT", justification = "Assignment designed to work this way.")
private ReturnCode navigate(final byte[] currentKey, final int offset, final int length, Terminate terminate) {
    int nSlots = slots.size();
    // First check to see if we're in-range until we reach our end key
    if (endKeyLength > 0) {
        if (Bytes.compareTo(currentKey, offset, length, endKey, 0, endKeyLength) < 0) {
            return getIncludeReturnCode();
        }
        // since we know we'll be past the current row after including it.
        if (slots.get(nSlots - 1).get(position[nSlots - 1]).isSingleKey()) {
            if (nextPosition(nSlots - 1) < 0) {
                // Current row will be included, but we have no more
                isDone = true;
                return ReturnCode.NEXT_ROW;
            }
        } else {
            // Reset the positions to zero from the next slot after the earliest ranged slot, since the
            // next key could be bigger at this ranged slot, and smaller than the current position of
            // less significant slots.
            int earliestRangeIndex = nSlots - 1;
            for (int i = 0; i < nSlots; i++) {
                if (!slots.get(i).get(position[i]).isSingleKey()) {
                    earliestRangeIndex = i;
                    break;
                }
            }
            Arrays.fill(position, earliestRangeIndex + 1, position.length, 0);
        }
    }
    endKeyLength = 0;
    // We could have included the previous
    if (isDone) {
        return ReturnCode.NEXT_ROW;
    }
    int i = 0;
    boolean seek = false;
    int earliestRangeIndex = nSlots - 1;
    int minOffset = offset;
    int maxOffset = schema.iterator(currentKey, minOffset, length, ptr);
    schema.next(ptr, ScanUtil.getRowKeyPosition(slotSpan, i), maxOffset, slotSpan[i]);
    while (true) {
        // Comparator depends on field in schema
        BytesComparator comparator = ScanUtil.getComparator(schema.getField(ScanUtil.getRowKeyPosition(slotSpan, i)));
        // Increment to the next range while the upper bound of our current slot is less than our current key
        while (position[i] < slots.get(i).size() && slots.get(i).get(position[i]).compareUpperToLowerBound(ptr, comparator) < 0) {
            position[i]++;
        }
        Arrays.fill(position, i + 1, position.length, 0);
        if (position[i] >= slots.get(i).size()) {
            // If navigating to current key, just return
            if (terminate == Terminate.AT) {
                return ReturnCode.SEEK_NEXT_USING_HINT;
            }
            if (i == 0) {
                isDone = true;
                return ReturnCode.NEXT_ROW;
            }
            // Increment key and backtrack until in range. We know at this point that we'll be
            // issuing a seek next hint.
            seek = true;
            Arrays.fill(position, i, position.length, 0);
            int j = i - 1;
            // If we're positioned at a single key, no need to copy the current key and get the next key .
            // Instead, just increment to the next key and continue.
            boolean incremented = false;
            while (j >= 0 && slots.get(j).get(position[j]).isSingleKey() && (incremented = true) && (position[j] = (position[j] + 1) % slots.get(j).size()) == 0) {
                j--;
                incremented = false;
            }
            if (j < 0) {
                isDone = true;
                return ReturnCode.NEXT_ROW;
            }
            if (incremented) {
                // Continue the loop after setting the start key, because our start key maybe smaller than
                // the current key, so we'll end up incrementing the start key until it's bigger than the
                // current key.
                setStartKey();
                schema.reposition(ptr, ScanUtil.getRowKeyPosition(slotSpan, i), ScanUtil.getRowKeyPosition(slotSpan, j), minOffset, maxOffset, slotSpan[j]);
            } else {
                //for PHOENIX-3705, now ptr is still point to slot i, we must make ptr point to slot j+1,
                //because following setStartKey method will copy rowKey columns before ptr to startKey and
                //then copy the lower bound of slots from j+1, according to position array, so if we do not
                //make ptr point to slot j+1 before setStartKey,the startKey would be erroneous.
                schema.reposition(ptr, ScanUtil.getRowKeyPosition(slotSpan, i), ScanUtil.getRowKeyPosition(slotSpan, j + 1), minOffset, maxOffset, slotSpan[j + 1]);
                int currentLength = setStartKey(ptr, minOffset, j + 1, nSlots, false);
                // From here on, we use startKey as our buffer (resetting minOffset and maxOffset)
                // We've copied the part of the current key above that we need into startKey
                // Reinitialize the iterator to be positioned at previous slot position
                minOffset = 0;
                maxOffset = startKeyLength;
                //make ptr point to the first rowKey column of slot j,why we need slotSpan[j] because for Row Value Constructor(RVC),
                //slot j may span multiple rowKey columns, so the length of ptr must consider the slotSpan[j].
                schema.iterator(startKey, minOffset, maxOffset, ptr, ScanUtil.getRowKeyPosition(slotSpan, j) + 1, slotSpan[j]);
                // Do nextKey after setting the accessor b/c otherwise the null byte may have
                // been incremented causing us not to find it
                ByteUtil.nextKey(startKey, currentLength);
            }
            i = j;
        } else if (slots.get(i).get(position[i]).compareLowerToUpperBound(ptr, comparator) > 0) {
            // Our current key is less than the lower range of the current position in the current slot.
            // Seek to the lower range, since it's bigger than the current key
            setStartKey(ptr, minOffset, i, nSlots, false);
            return ReturnCode.SEEK_NEXT_USING_HINT;
        } else {
            // We're in range, check the next slot
            if (!slots.get(i).get(position[i]).isSingleKey() && i < earliestRangeIndex) {
                earliestRangeIndex = i;
            }
            // have a null byte that was incremented which screws up our schema.next call)
            if (i == nSlots - 1 || seek) {
                break;
            }
            i++;
            // If we run out of slots in our key, it means we have a partial key.
            if (schema.next(ptr, ScanUtil.getRowKeyPosition(slotSpan, i), maxOffset, slotSpan[i]) == null) {
                // that's the case (since we don't store trailing nulls).
                if (allTrailingNulls(i)) {
                    break;
                }
                // Otherwise we seek to the next start key because we're before it now
                setStartKey(ptr, minOffset, i, nSlots, true);
                return ReturnCode.SEEK_NEXT_USING_HINT;
            }
        }
    }
    if (seek) {
        return ReturnCode.SEEK_NEXT_USING_HINT;
    }
    // Else, we're in range for all slots and can include this row plus all rows 
    // up to the upper range of our last slot. We do this for ranges and single keys
    // since we potentially have multiple key values for the same row key.
    setEndKey(ptr, minOffset, i);
    return getIncludeReturnCode();
}
Also used : BytesComparator(org.apache.phoenix.util.ScanUtil.BytesComparator)

Aggregations

BytesComparator (org.apache.phoenix.util.ScanUtil.BytesComparator)2 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 FilterList (org.apache.hadoop.hbase.filter.FilterList)1 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)1 TimeRange (org.apache.hadoop.hbase.io.TimeRange)1 KeyRange (org.apache.phoenix.query.KeyRange)1