use of org.apache.phoenix.schema.ValueBitSet in project phoenix by apache.
the class ProjectionCompiler method compile.
/**
* Builds the projection for the scan
* @param context query context kept between compilation of different query clauses
* @param statement TODO
* @param groupBy compiled GROUP BY clause
* @param targetColumns list of columns, parallel to aliasedNodes, that are being set for an
* UPSERT SELECT statement. Used to coerce expression types to the expected target type.
* @return projector used to access row values during scan
* @throws SQLException
*/
public static RowProjector compile(StatementContext context, SelectStatement statement, GroupBy groupBy, List<? extends PDatum> targetColumns, Expression where) throws SQLException {
List<KeyValueColumnExpression> arrayKVRefs = new ArrayList<KeyValueColumnExpression>();
List<ProjectedColumnExpression> arrayProjectedColumnRefs = new ArrayList<ProjectedColumnExpression>();
List<Expression> arrayKVFuncs = new ArrayList<Expression>();
List<Expression> arrayOldFuncs = new ArrayList<Expression>();
Map<Expression, Integer> arrayExpressionCounts = new HashMap<>();
List<AliasedNode> aliasedNodes = statement.getSelect();
// Setup projected columns in Scan
SelectClauseVisitor selectVisitor = new SelectClauseVisitor(context, groupBy, arrayKVRefs, arrayKVFuncs, arrayExpressionCounts, arrayProjectedColumnRefs, arrayOldFuncs, statement);
List<ExpressionProjector> projectedColumns = new ArrayList<ExpressionProjector>();
ColumnResolver resolver = context.getResolver();
TableRef tableRef = context.getCurrentTable();
PTable table = tableRef.getTable();
boolean resolveColumn = !tableRef.equals(resolver.getTables().get(0));
boolean isWildcard = false;
Scan scan = context.getScan();
int index = 0;
List<Expression> projectedExpressions = Lists.newArrayListWithExpectedSize(aliasedNodes.size());
List<byte[]> projectedFamilies = Lists.newArrayListWithExpectedSize(aliasedNodes.size());
for (AliasedNode aliasedNode : aliasedNodes) {
ParseNode node = aliasedNode.getNode();
// TODO: visitor?
if (node instanceof WildcardParseNode) {
if (statement.isAggregate()) {
ExpressionCompiler.throwNonAggExpressionInAggException(node.toString());
}
if (tableRef == TableRef.EMPTY_TABLE_REF) {
throw new SQLExceptionInfo.Builder(SQLExceptionCode.NO_TABLE_SPECIFIED_FOR_WILDCARD_SELECT).build().buildException();
}
isWildcard = true;
if (tableRef.getTable().getType() == PTableType.INDEX && ((WildcardParseNode) node).isRewrite()) {
projectAllIndexColumns(context, tableRef, resolveColumn, projectedExpressions, projectedColumns, targetColumns);
} else {
projectAllTableColumns(context, tableRef, resolveColumn, projectedExpressions, projectedColumns, targetColumns);
}
} else if (node instanceof TableWildcardParseNode) {
TableName tName = ((TableWildcardParseNode) node).getTableName();
TableRef tRef = resolver.resolveTable(tName.getSchemaName(), tName.getTableName());
if (tRef.equals(tableRef)) {
isWildcard = true;
}
if (tRef.getTable().getType() == PTableType.INDEX && ((TableWildcardParseNode) node).isRewrite()) {
projectAllIndexColumns(context, tRef, true, projectedExpressions, projectedColumns, targetColumns);
} else {
projectAllTableColumns(context, tRef, true, projectedExpressions, projectedColumns, targetColumns);
}
} else if (node instanceof FamilyWildcardParseNode) {
if (tableRef == TableRef.EMPTY_TABLE_REF) {
throw new SQLExceptionInfo.Builder(SQLExceptionCode.NO_TABLE_SPECIFIED_FOR_WILDCARD_SELECT).build().buildException();
}
// Project everything for SELECT cf.*
String cfName = ((FamilyWildcardParseNode) node).getName();
// Delay projecting to scan, as when any other column in the column family gets
// added to the scan, it overwrites that we want to project the entire column
// family. Instead, we do the projection at the end.
// TODO: consider having a ScanUtil.addColumn and ScanUtil.addFamily to work
// around this, as this code depends on this function being the last place where
// columns are projected (which is currently true, but could change).
projectedFamilies.add(Bytes.toBytes(cfName));
if (tableRef.getTable().getType() == PTableType.INDEX && ((FamilyWildcardParseNode) node).isRewrite()) {
projectIndexColumnFamily(context, cfName, tableRef, resolveColumn, projectedExpressions, projectedColumns);
} else {
projectTableColumnFamily(context, cfName, tableRef, resolveColumn, projectedExpressions, projectedColumns);
}
} else {
Expression expression = node.accept(selectVisitor);
projectedExpressions.add(expression);
expression = coerceIfNecessary(index, targetColumns, expression);
if (node instanceof BindParseNode) {
context.getBindManager().addParamMetaData((BindParseNode) node, expression);
}
if (!node.isStateless()) {
if (!selectVisitor.isAggregate() && statement.isAggregate()) {
ExpressionCompiler.throwNonAggExpressionInAggException(expression.toString());
}
}
String columnAlias = aliasedNode.getAlias() != null ? aliasedNode.getAlias() : SchemaUtil.normalizeIdentifier(aliasedNode.getNode().getAlias());
boolean isCaseSensitive = aliasedNode.getAlias() != null ? aliasedNode.isCaseSensitve() : (columnAlias != null ? SchemaUtil.isCaseSensitive(aliasedNode.getNode().getAlias()) : selectVisitor.isCaseSensitive);
String name = columnAlias == null ? expression.toString() : columnAlias;
projectedColumns.add(new ExpressionProjector(name, tableRef.getTableAlias() == null ? (table.getName() == null ? "" : table.getName().getString()) : tableRef.getTableAlias(), expression, isCaseSensitive));
}
selectVisitor.reset();
index++;
}
for (int i = arrayProjectedColumnRefs.size() - 1; i >= 0; i--) {
Expression expression = arrayProjectedColumnRefs.get(i);
Integer count = arrayExpressionCounts.get(expression);
if (count != 0) {
arrayKVRefs.remove(i);
arrayKVFuncs.remove(i);
arrayOldFuncs.remove(i);
}
}
if (arrayKVFuncs.size() > 0 && arrayKVRefs.size() > 0) {
serailizeArrayIndexInformationAndSetInScan(context, arrayKVFuncs, arrayKVRefs);
KeyValueSchemaBuilder builder = new KeyValueSchemaBuilder(0);
for (Expression expression : arrayKVRefs) {
builder.addField(expression);
}
KeyValueSchema kvSchema = builder.build();
ValueBitSet arrayIndexesBitSet = ValueBitSet.newInstance(kvSchema);
builder = new KeyValueSchemaBuilder(0);
for (Expression expression : arrayKVFuncs) {
builder.addField(expression);
}
KeyValueSchema arrayIndexesSchema = builder.build();
Map<Expression, Expression> replacementMap = new HashMap<>();
for (int i = 0; i < arrayOldFuncs.size(); i++) {
Expression function = arrayKVFuncs.get(i);
replacementMap.put(arrayOldFuncs.get(i), new ArrayIndexExpression(i, function.getDataType(), arrayIndexesBitSet, arrayIndexesSchema));
}
ReplaceArrayFunctionExpressionVisitor visitor = new ReplaceArrayFunctionExpressionVisitor(replacementMap);
for (int i = 0; i < projectedColumns.size(); i++) {
ExpressionProjector projector = projectedColumns.get(i);
projectedColumns.set(i, new ExpressionProjector(projector.getName(), tableRef.getTableAlias() == null ? (table.getName() == null ? "" : table.getName().getString()) : tableRef.getTableAlias(), projector.getExpression().accept(visitor), projector.isCaseSensitive()));
}
}
// TODO make estimatedByteSize more accurate by counting the joined columns.
int estimatedKeySize = table.getRowKeySchema().getEstimatedValueLength();
int estimatedByteSize = 0;
for (Map.Entry<byte[], NavigableSet<byte[]>> entry : scan.getFamilyMap().entrySet()) {
PColumnFamily family = table.getColumnFamily(entry.getKey());
if (entry.getValue() == null) {
for (PColumn column : family.getColumns()) {
Integer maxLength = column.getMaxLength();
int byteSize = column.getDataType().isFixedWidth() ? maxLength == null ? column.getDataType().getByteSize() : maxLength : RowKeySchema.ESTIMATED_VARIABLE_LENGTH_SIZE;
estimatedByteSize += SizedUtil.KEY_VALUE_SIZE + estimatedKeySize + byteSize;
}
} else {
for (byte[] cq : entry.getValue()) {
//if (!Bytes.equals(cq, ByteUtil.EMPTY_BYTE_ARRAY) || cq.length > 0) {
PColumn column = family.getPColumnForColumnQualifier(cq);
Integer maxLength = column.getMaxLength();
int byteSize = column.getDataType().isFixedWidth() ? maxLength == null ? column.getDataType().getByteSize() : maxLength : RowKeySchema.ESTIMATED_VARIABLE_LENGTH_SIZE;
estimatedByteSize += SizedUtil.KEY_VALUE_SIZE + estimatedKeySize + byteSize;
}
//}
}
}
boolean isProjectEmptyKeyValue = false;
if (isWildcard) {
projectAllColumnFamilies(table, scan);
} else {
isProjectEmptyKeyValue = where == null || LiteralExpression.isTrue(where) || where.requiresFinalEvaluation();
for (byte[] family : projectedFamilies) {
projectColumnFamily(table, scan, family);
}
}
return new RowProjector(projectedColumns, estimatedByteSize, isProjectEmptyKeyValue, resolver.hasUDFs(), isWildcard);
}
use of org.apache.phoenix.schema.ValueBitSet in project phoenix by apache.
the class PhoenixRuntime method encodeColumnValues.
/**
*
* @param conn connection that was used for reading/generating value.
* @param fullTableName fully qualified table name
* @param values values of the columns
* @param columns list of pair of column that includes column family as first part and column name as the second part.
* Column family is optional and hence nullable. Columns in the list have to be in the same order as the order of occurence
* of their values in the object array.
* @return values encoded in a byte array
* @throws SQLException
* @see {@link #decodeValues(Connection, String, byte[], List)}
*/
public static byte[] encodeColumnValues(Connection conn, String fullTableName, Object[] values, List<Pair<String, String>> columns) throws SQLException {
PTable table = getTable(conn, fullTableName);
List<PColumn> pColumns = getColumns(table, columns);
List<Expression> expressions = new ArrayList<Expression>(pColumns.size());
int i = 0;
for (PColumn col : pColumns) {
Object value = values[i];
// for purposes of encoding, sort order of the columns doesn't matter.
Expression expr = LiteralExpression.newConstant(value, col.getDataType(), col.getMaxLength(), col.getScale());
expressions.add(expr);
i++;
}
KeyValueSchema kvSchema = buildKeyValueSchema(pColumns);
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
ValueBitSet valueSet = ValueBitSet.newInstance(kvSchema);
return kvSchema.toBytes(expressions.toArray(new Expression[0]), valueSet, ptr);
}
use of org.apache.phoenix.schema.ValueBitSet in project phoenix by apache.
the class PhoenixRuntime method decodeColumnValues.
/**
*
* @param conn connection that was used for reading/generating value.
* @param fullTableName fully qualified table name
* @param value byte value of the columns concatenated as a single byte array. @see {@link #encodeColumnValues(Connection, String, Object[], List)}
* @param columns list of column names for the columns that have their respective values
* present in the byte array. The column names should be in the same order as their values are in the byte array.
* The column name includes both family name, if present, and column name.
* @return decoded values for each column
* @throws SQLException
*
*/
public static Object[] decodeColumnValues(Connection conn, String fullTableName, byte[] value, List<Pair<String, String>> columns) throws SQLException {
PTable table = getTable(conn, fullTableName);
KeyValueSchema kvSchema = buildKeyValueSchema(getColumns(table, columns));
ImmutableBytesWritable ptr = new ImmutableBytesWritable(value);
ValueBitSet valueSet = ValueBitSet.newInstance(kvSchema);
valueSet.clear();
valueSet.or(ptr);
int maxOffset = ptr.getOffset() + ptr.getLength();
Boolean hasValue;
kvSchema.iterator(ptr);
int i = 0;
List<Object> values = new ArrayList<Object>();
while (hasValue = kvSchema.next(ptr, i, maxOffset, valueSet) != null) {
if (hasValue) {
values.add(kvSchema.getField(i).getDataType().toObject(ptr));
}
i++;
}
return values.toArray();
}
use of org.apache.phoenix.schema.ValueBitSet in project phoenix by apache.
the class RegionScannerFactory method getWrappedScanner.
/**
* Return wrapped scanner that catches unexpected exceptions (i.e. Phoenix bugs) and
* re-throws as DoNotRetryIOException to prevent needless retrying hanging the query
* for 30 seconds. Unfortunately, until HBASE-7481 gets fixed, there's no way to do
* the same from a custom filter.
* @param arrayKVRefs
* @param arrayFuncRefs
* @param offset starting position in the rowkey.
* @param scan
* @param tupleProjector
* @param dataRegion
* @param indexMaintainer
* @param tx current transaction
* @param viewConstants
*/
public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env, final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, final int offset, final Scan scan, final ColumnReference[] dataColumns, final TupleProjector tupleProjector, final Region dataRegion, final IndexMaintainer indexMaintainer, Transaction tx, final byte[][] viewConstants, final KeyValueSchema kvSchema, final ValueBitSet kvSchemaBitSet, final TupleProjector projector, final ImmutableBytesWritable ptr, final boolean useQualifierAsListIndex) {
return new RegionScanner() {
private boolean hasReferences = checkForReferenceFiles();
private HRegionInfo regionInfo = env.getRegionInfo();
private byte[] actualStartKey = getActualStartKey();
// If there are any reference files after local index region merge some cases we might
// get the records less than scan start row key. This will happen when we replace the
// actual region start key with merge region start key. This method gives whether are
// there any reference files in the region or not.
private boolean checkForReferenceFiles() {
if (!ScanUtil.isLocalIndex(scan))
return false;
for (byte[] family : scan.getFamilies()) {
if (getRegion().getStore(family).hasReferences()) {
return true;
}
}
return false;
}
// Get the actual scan start row of local index. This will be used to compare the row
// key of the results less than scan start row when there are references.
public byte[] getActualStartKey() {
return ScanUtil.isLocalIndex(scan) ? ScanUtil.getActualStartRow(scan, regionInfo) : null;
}
@Override
public boolean next(List<Cell> results) throws IOException {
try {
return s.next(results);
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
try {
return s.next(result, scannerContext);
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public void close() throws IOException {
s.close();
}
@Override
public HRegionInfo getRegionInfo() {
return s.getRegionInfo();
}
@Override
public boolean isFilterDone() throws IOException {
return s.isFilterDone();
}
@Override
public boolean reseek(byte[] row) throws IOException {
return s.reseek(row);
}
@Override
public long getMvccReadPoint() {
return s.getMvccReadPoint();
}
@Override
public boolean nextRaw(List<Cell> result) throws IOException {
try {
boolean next = s.nextRaw(result);
Cell arrayElementCell = null;
if (result.size() == 0) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
if (ScanUtil.isLocalIndex(scan) && !ScanUtil.isAnalyzeTable(scan)) {
if (hasReferences && actualStartKey != null) {
next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, null, arrayElementCell);
if (result.isEmpty()) {
return next;
}
}
/* In the following, c is only used when data region is null.
dataRegion will never be null in case of non-coprocessor call,
therefore no need to refactor
*/
IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
}
if (projector != null) {
Tuple toProject = useQualifierAsListIndex ? new PositionBasedResultTuple(result) : new ResultTuple(Result.create(result));
Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
result.clear();
result.add(tuple.getValue(0));
if (arrayElementCell != null) {
result.add(arrayElementCell);
}
}
// There is a scanattribute set to retrieve the specific array element
return next;
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws IOException {
try {
boolean next = s.nextRaw(result, scannerContext);
Cell arrayElementCell = null;
if (result.size() == 0) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
if ((offset > 0 || ScanUtil.isLocalIndex(scan)) && !ScanUtil.isAnalyzeTable(scan)) {
if (hasReferences && actualStartKey != null) {
next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, scannerContext, arrayElementCell);
if (result.isEmpty()) {
return next;
}
}
/* In the following, c is only used when data region is null.
dataRegion will never be null in case of non-coprocessor call,
therefore no need to refactor
*/
IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
}
if (projector != null) {
Tuple toProject = useQualifierAsListIndex ? new PositionBasedMultiKeyValueTuple(result) : new ResultTuple(Result.create(result));
Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
result.clear();
result.add(tuple.getValue(0));
if (arrayElementCell != null)
result.add(arrayElementCell);
}
// There is a scanattribute set to retrieve the specific array element
return next;
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
/**
* When there is a merge in progress while scanning local indexes we might get the key values less than scan start row.
* In that case we need to scan until get the row key more or equal to scan start key.
* TODO try to fix this case in LocalIndexStoreFileScanner when there is a merge.
*/
private boolean scanTillScanStartRow(final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result, ScannerContext scannerContext, Cell arrayElementCell) throws IOException {
boolean next = true;
Cell firstCell = result.get(0);
while (Bytes.compareTo(firstCell.getRowArray(), firstCell.getRowOffset(), firstCell.getRowLength(), actualStartKey, 0, actualStartKey.length) < 0) {
result.clear();
if (scannerContext == null) {
next = s.nextRaw(result);
} else {
next = s.nextRaw(result, scannerContext);
}
if (result.isEmpty()) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
firstCell = result.get(0);
}
return next;
}
private int replaceArrayIndexElement(final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result) {
// make a copy of the results array here, as we're modifying it below
MultiKeyValueTuple tuple = new MultiKeyValueTuple(ImmutableList.copyOf(result));
// The size of both the arrays would be same?
// Using KeyValueSchema to set and retrieve the value
// collect the first kv to get the row
Cell rowKv = result.get(0);
for (KeyValueColumnExpression kvExp : arrayKVRefs) {
if (kvExp.evaluate(tuple, ptr)) {
ListIterator<Cell> itr = result.listIterator();
while (itr.hasNext()) {
Cell kv = itr.next();
if (Bytes.equals(kvExp.getColumnFamily(), 0, kvExp.getColumnFamily().length, kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength()) && Bytes.equals(kvExp.getColumnQualifier(), 0, kvExp.getColumnQualifier().length, kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength())) {
// remove the kv that has the full array values.
itr.remove();
break;
}
}
}
}
byte[] value = kvSchema.toBytes(tuple, arrayFuncRefs, kvSchemaBitSet, ptr);
// Add a dummy kv with the exact value of the array index
result.add(new KeyValue(rowKv.getRowArray(), rowKv.getRowOffset(), rowKv.getRowLength(), QueryConstants.ARRAY_VALUE_COLUMN_FAMILY, 0, QueryConstants.ARRAY_VALUE_COLUMN_FAMILY.length, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER, 0, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER.length, HConstants.LATEST_TIMESTAMP, KeyValue.Type.codeToType(rowKv.getTypeByte()), value, 0, value.length));
return result.size() - 1;
}
@Override
public long getMaxResultSize() {
return s.getMaxResultSize();
}
@Override
public int getBatch() {
return s.getBatch();
}
};
}
use of org.apache.phoenix.schema.ValueBitSet in project phoenix by apache.
the class PhoenixRuntime method encodeValues.
/**
*
* @param conn connection that was used for reading/generating value.
* @param fullTableName fully qualified table name
* @param values values of the columns
* @param columns list of pair of column that includes column family as first part and column name as the second part.
* Column family is optional and hence nullable. Columns in the list have to be in the same order as the order of occurence
* of their values in the object array.
* @return values encoded in a byte array
* @throws SQLException
* @see {@link #decodeValues(Connection, String, byte[], List)}
*/
@Deprecated
public static byte[] encodeValues(Connection conn, String fullTableName, Object[] values, List<Pair<String, String>> columns) throws SQLException {
PTable table = getTable(conn, fullTableName);
List<PColumn> pColumns = getPColumns(table, columns);
List<Expression> expressions = new ArrayList<Expression>(pColumns.size());
int i = 0;
for (PColumn col : pColumns) {
Object value = values[i];
// for purposes of encoding, sort order of the columns doesn't matter.
Expression expr = LiteralExpression.newConstant(value, col.getDataType(), col.getMaxLength(), col.getScale());
expressions.add(expr);
i++;
}
KeyValueSchema kvSchema = buildKeyValueSchema(pColumns);
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
ValueBitSet valueSet = ValueBitSet.newInstance(kvSchema);
return kvSchema.toBytes(expressions.toArray(new Expression[0]), valueSet, ptr);
}
Aggregations