use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class HiveParserCalcitePlanner method genUDTFPlan.
private RelNode genUDTFPlan(SqlOperator sqlOperator, String genericUDTFName, String outputTableAlias, List<String> colAliases, HiveParserQB qb, List<RexNode> operands, List<ColumnInfo> opColInfos, RelNode input, boolean inSelect, boolean isOuter) throws SemanticException {
Preconditions.checkState(!isOuter || !inSelect, "OUTER is not supported for SELECT UDTF");
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
HiveParserQBParseInfo qbp = qb.getParseInfo();
if (inSelect && !qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (inSelect && !qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (inSelect && !qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (inSelect && !qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (inSelect && !qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
// Create the object inspector for the input columns and initialize the UDTF
RelDataType relDataType = HiveParserUtils.inferReturnTypeForOperands(sqlOperator, operands, cluster.getTypeFactory());
DataType dataType = HiveParserUtils.toDataType(relDataType);
StructObjectInspector outputOI = (StructObjectInspector) HiveInspectors.getObjectInspector(HiveTypeUtil.toHiveTypeInfo(dataType, false));
// this should only happen for select udtf
if (outputTableAlias == null) {
Preconditions.checkState(inSelect, "Table alias not specified for lateral view");
String prefix = "select_" + genericUDTFName + "_alias_";
int i = 0;
while (qb.getAliases().contains(prefix + i)) {
i++;
}
outputTableAlias = prefix + i;
}
if (colAliases.isEmpty()) {
// user did not specify alias names, infer names from outputOI
for (StructField field : outputOI.getAllStructFieldRefs()) {
colAliases.add(field.getFieldName());
}
}
// Make sure that the number of column aliases in the AS clause matches the number of
// columns output by the UDTF
int numOutputCols = outputOI.getAllStructFieldRefs().size();
int numSuppliedAliases = colAliases.size();
if (numOutputCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numOutputCols + " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
ArrayList<ColumnInfo> udtfOutputCols = new ArrayList<>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (StructField sf : outputOI.getAllStructFieldRefs()) {
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename all the internal
// names,
// we can just use field name from the UDTF's OI as the internal name
ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
udtfOutputCols.add(col);
}
// Create the row resolver for the table function scan
HiveParserRowResolver udtfOutRR = new HiveParserRowResolver();
for (int i = 0; i < udtfOutputCols.size(); i++) {
udtfOutRR.put(outputTableAlias, colAliases.get(i), udtfOutputCols.get(i));
}
// Build row type from field <type, name>
RelDataType retType = HiveParserTypeConverter.getType(cluster, udtfOutRR, null);
List<RelDataType> argTypes = new ArrayList<>();
RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory();
for (ColumnInfo ci : opColInfos) {
argTypes.add(HiveParserUtils.toRelDataType(ci.getType(), dtFactory));
}
SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteFn(genericUDTFName, argTypes, retType, false);
RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, operands);
// convert the rex call
TableFunctionConverter udtfConverter = new TableFunctionConverter(cluster, input, frameworkConfig.getOperatorTable(), catalogReader.nameMatcher());
RexCall convertedCall = (RexCall) rexNode.accept(udtfConverter);
SqlOperator convertedOperator = convertedCall.getOperator();
Preconditions.checkState(convertedOperator instanceof SqlUserDefinedTableFunction, "Expect operator to be " + SqlUserDefinedTableFunction.class.getSimpleName() + ", actually got " + convertedOperator.getClass().getSimpleName());
// TODO: how to decide this?
Type elementType = Object[].class;
// create LogicalTableFunctionScan
RelNode tableFunctionScan = LogicalTableFunctionScan.create(input.getCluster(), Collections.emptyList(), convertedCall, elementType, retType, null);
// remember the table alias for the UDTF so that we can reference the cols later
qb.addAlias(outputTableAlias);
RelNode correlRel;
RexBuilder rexBuilder = cluster.getRexBuilder();
// find correlation in the converted call
Pair<List<CorrelationId>, ImmutableBitSet> correlUse = getCorrelationUse(convertedCall);
// create correlate node
if (correlUse == null) {
correlRel = plannerContext.createRelBuilder(catalogManager.getCurrentCatalog(), catalogManager.getCurrentDatabase()).push(input).push(tableFunctionScan).join(isOuter ? JoinRelType.LEFT : JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
} else {
if (correlUse.left.size() > 1) {
tableFunctionScan = DeduplicateCorrelateVariables.go(rexBuilder, correlUse.left.get(0), Util.skip(correlUse.left), tableFunctionScan);
}
correlRel = LogicalCorrelate.create(input, tableFunctionScan, correlUse.left.get(0), correlUse.right, isOuter ? JoinRelType.LEFT : JoinRelType.INNER);
}
// Add new rel & its RR to the maps
relToHiveColNameCalcitePosMap.put(tableFunctionScan, buildHiveToCalciteColumnMap(udtfOutRR));
relToRowResolver.put(tableFunctionScan, udtfOutRR);
HiveParserRowResolver correlRR = HiveParserRowResolver.getCombinedRR(relToRowResolver.get(input), relToRowResolver.get(tableFunctionScan));
relToHiveColNameCalcitePosMap.put(correlRel, buildHiveToCalciteColumnMap(correlRR));
relToRowResolver.put(correlRel, correlRR);
if (!inSelect) {
return correlRel;
}
// create project node
List<RexNode> projects = new ArrayList<>();
HiveParserRowResolver projectRR = new HiveParserRowResolver();
int j = 0;
for (int i = input.getRowType().getFieldCount(); i < correlRel.getRowType().getFieldCount(); i++) {
projects.add(cluster.getRexBuilder().makeInputRef(correlRel, i));
ColumnInfo inputColInfo = correlRR.getRowSchema().getSignature().get(i);
String colAlias = inputColInfo.getAlias();
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(j++), inputColInfo.getObjectInspector(), null, false);
projectRR.put(null, colAlias, colInfo);
}
RelNode projectNode = LogicalProject.create(correlRel, Collections.emptyList(), projects, tableFunctionScan.getRowType());
relToHiveColNameCalcitePosMap.put(projectNode, buildHiveToCalciteColumnMap(projectRR));
relToRowResolver.put(projectNode, projectRR);
return projectNode;
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class HiveParserDMLHelper method createInsertOperationInfo.
public Tuple4<ObjectIdentifier, QueryOperation, Map<String, String>, Boolean> createInsertOperationInfo(RelNode queryRelNode, Table destTable, Map<String, String> staticPartSpec, List<String> destSchema, boolean overwrite) throws SemanticException {
// sanity check
Preconditions.checkArgument(queryRelNode instanceof Project || queryRelNode instanceof Sort || queryRelNode instanceof LogicalDistribution, "Expect top RelNode to be Project, Sort, or LogicalDistribution, actually got " + queryRelNode);
if (!(queryRelNode instanceof Project)) {
RelNode parent = ((SingleRel) queryRelNode).getInput();
// SEL + SORT or SEL + DIST + LIMIT
Preconditions.checkArgument(parent instanceof Project || parent instanceof LogicalDistribution, "Expect input to be a Project or LogicalDistribution, actually got " + parent);
if (parent instanceof LogicalDistribution) {
RelNode grandParent = ((LogicalDistribution) parent).getInput();
Preconditions.checkArgument(grandParent instanceof Project, "Expect input of LogicalDistribution to be a Project, actually got " + grandParent);
}
}
// handle dest schema, e.g. insert into dest(.,.,.) select ...
queryRelNode = handleDestSchema((SingleRel) queryRelNode, destTable, destSchema, staticPartSpec.keySet());
// track each target col and its expected type
RelDataTypeFactory typeFactory = plannerContext.getTypeFactory();
LinkedHashMap<String, RelDataType> targetColToCalcType = new LinkedHashMap<>();
List<TypeInfo> targetHiveTypes = new ArrayList<>();
List<FieldSchema> allCols = new ArrayList<>(destTable.getCols());
allCols.addAll(destTable.getPartCols());
for (FieldSchema col : allCols) {
TypeInfo hiveType = TypeInfoUtils.getTypeInfoFromTypeString(col.getType());
targetHiveTypes.add(hiveType);
targetColToCalcType.put(col.getName(), HiveParserTypeConverter.convert(hiveType, typeFactory));
}
// add static partitions to query source
if (!staticPartSpec.isEmpty()) {
if (queryRelNode instanceof Project) {
queryRelNode = replaceProjectForStaticPart((Project) queryRelNode, staticPartSpec, destTable, targetColToCalcType);
} else if (queryRelNode instanceof Sort) {
Sort sort = (Sort) queryRelNode;
RelNode oldInput = sort.getInput();
RelNode newInput;
if (oldInput instanceof LogicalDistribution) {
newInput = replaceDistForStaticParts((LogicalDistribution) oldInput, destTable, staticPartSpec, targetColToCalcType);
} else {
newInput = replaceProjectForStaticPart((Project) oldInput, staticPartSpec, destTable, targetColToCalcType);
// we may need to shift the field collations
final int numDynmPart = destTable.getTTable().getPartitionKeys().size() - staticPartSpec.size();
if (!sort.getCollation().getFieldCollations().isEmpty() && numDynmPart > 0) {
sort.replaceInput(0, null);
sort = LogicalSort.create(newInput, shiftRelCollation(sort.getCollation(), (Project) oldInput, staticPartSpec.size(), numDynmPart), sort.offset, sort.fetch);
}
}
sort.replaceInput(0, newInput);
queryRelNode = sort;
} else {
queryRelNode = replaceDistForStaticParts((LogicalDistribution) queryRelNode, destTable, staticPartSpec, targetColToCalcType);
}
}
// add type conversions
queryRelNode = addTypeConversions(plannerContext.getCluster().getRexBuilder(), queryRelNode, new ArrayList<>(targetColToCalcType.values()), targetHiveTypes, funcConverter);
// create identifier
List<String> targetTablePath = Arrays.asList(destTable.getDbName(), destTable.getTableName());
UnresolvedIdentifier unresolvedIdentifier = UnresolvedIdentifier.of(targetTablePath);
ObjectIdentifier identifier = catalogManager.qualifyIdentifier(unresolvedIdentifier);
return Tuple4.of(identifier, new PlannerQueryOperation(queryRelNode), staticPartSpec, overwrite);
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class HiveParserRexNodeConverter method convertConstant.
public static RexNode convertConstant(ExprNodeConstantDesc literal, RelOptCluster cluster) throws SemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
RelDataType calciteDataType = HiveParserTypeConverter.convert(hiveType, dtFactory);
PrimitiveObjectInspector.PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
ConstantObjectInspector coi = literal.getWritableObjectInspector();
Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), coi);
RexNode calciteLiteral;
HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
// If value is null, the type should also be VOID.
if (value == null) {
hiveTypeCategory = PrimitiveObjectInspector.PrimitiveCategory.VOID;
}
// TODO: Verify if we need to use ConstantObjectInspector to unwrap data
switch(hiveTypeCategory) {
case BOOLEAN:
calciteLiteral = rexBuilder.makeLiteral((Boolean) value);
break;
case BYTE:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
break;
case SHORT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
break;
case INT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
break;
case LONG:
calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
break;
// TODO: is Decimal an exact numeric or approximate numeric?
case DECIMAL:
if (value instanceof HiveDecimal) {
value = ((HiveDecimal) value).bigDecimalValue();
} else if (value instanceof Decimal128) {
value = ((Decimal128) value).toBigDecimal();
}
if (value == null) {
// For now, we will not run CBO in the presence of invalid decimal literals.
throw new SemanticException("Expression " + literal.getExprString() + " is not a valid decimal");
// TODO: return createNullLiteral(literal);
}
BigDecimal bd = (BigDecimal) value;
BigInteger unscaled = bd.unscaledValue();
if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
calciteLiteral = rexBuilder.makeExactLiteral(bd);
} else {
// CBO doesn't support unlimited precision decimals. In practice, this
// will work...
// An alternative would be to throw CboSemanticException and fall back
// to no CBO.
RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, unscaled.toString().length(), bd.scale());
calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
}
break;
case FLOAT:
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Float.toString((Float) value)), calciteDataType);
break;
case DOUBLE:
// TODO: The best solution is to support NaN in expression reduction.
if (Double.isNaN((Double) value)) {
throw new SemanticException("NaN");
}
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Double.toString((Double) value)), calciteDataType);
break;
case CHAR:
if (value instanceof HiveChar) {
value = ((HiveChar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case VARCHAR:
if (value instanceof HiveVarchar) {
value = ((HiveVarchar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case STRING:
Object constantDescVal = literal.getValue();
constantDescVal = constantDescVal instanceof NlsString ? constantDescVal : asUnicodeString((String) value);
// calcite treat string literal as char type, we should treat it as string just like
// hive
RelDataType type = HiveParserTypeConverter.convert(hiveType, dtFactory);
// if we get here, the value is not null
type = dtFactory.createTypeWithNullability(type, false);
calciteLiteral = rexBuilder.makeLiteral(constantDescVal, type, true);
break;
case DATE:
LocalDate localDate = HiveParserUtils.getSessionHiveShim().toFlinkDate(value);
DateString dateString = new DateString(localDate.getYear(), localDate.getMonthValue(), localDate.getDayOfMonth());
calciteLiteral = rexBuilder.makeDateLiteral(dateString);
break;
case TIMESTAMP:
TimestampString timestampString;
if (value instanceof Calendar) {
timestampString = TimestampString.fromCalendarFields((Calendar) value);
} else {
LocalDateTime localDateTime = HiveParserUtils.getSessionHiveShim().toFlinkTimestamp(value);
timestampString = new TimestampString(localDateTime.getYear(), localDateTime.getMonthValue(), localDateTime.getDayOfMonth(), localDateTime.getHour(), localDateTime.getMinute(), localDateTime.getSecond());
timestampString = timestampString.withNanos(localDateTime.getNano());
}
// hive always treats timestamp with precision 9
calciteLiteral = rexBuilder.makeTimestampLiteral(timestampString, 9);
break;
case VOID:
calciteLiteral = cluster.getRexBuilder().makeLiteral(null, dtFactory.createSqlType(SqlTypeName.NULL), true);
break;
case BINARY:
case UNKNOWN:
default:
if (hiveShim.isIntervalYearMonthType(hiveTypeCategory)) {
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveParserIntervalYearMonth) value).getTotalMonths());
calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
} else if (hiveShim.isIntervalDayTimeType(hiveTypeCategory)) {
// Calcite day-time interval is millis value as BigDecimal
// Seconds converted to millis
BigDecimal secsValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getTotalSeconds() * 1000);
// Nanos converted to millis
BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getNanos(), 6);
calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new SqlParserPos(1, 1)));
} else {
throw new RuntimeException("UnSupported Literal type " + hiveTypeCategory);
}
}
return calciteLiteral;
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class HiveParserTypeConverter method getType.
public static RelDataType getType(RelOptCluster cluster, HiveParserRowResolver rr, List<String> neededCols) throws SemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
RowSchema rs = rr.getRowSchema();
List<RelDataType> fieldTypes = new LinkedList<>();
List<String> fieldNames = new LinkedList<>();
for (ColumnInfo ci : rs.getSignature()) {
if (neededCols == null || neededCols.contains(ci.getInternalName())) {
fieldTypes.add(convert(ci.getType(), dtFactory));
fieldNames.add(ci.getInternalName());
}
}
return dtFactory.createStructType(fieldTypes, fieldNames);
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class SqlItemOperator method inferReturnType.
@Override
public RelDataType inferReturnType(SqlOperatorBinding opBinding) {
final RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
final RelDataType operandType = opBinding.getOperandType(0);
switch(operandType.getSqlTypeName()) {
case ARRAY:
return typeFactory.createTypeWithNullability(operandType.getComponentType(), true);
case MAP:
return typeFactory.createTypeWithNullability(operandType.getValueType(), true);
case ROW:
String fieldName = opBinding.getOperandLiteralValue(1, String.class);
RelDataTypeField field = operandType.getField(fieldName, false, false);
if (field == null) {
throw new AssertionError("Cannot infer type of field '" + fieldName + "' within ROW type: " + operandType);
} else {
RelDataType fieldType = field.getType();
if (operandType.isNullable()) {
fieldType = typeFactory.createTypeWithNullability(fieldType, true);
}
return fieldType;
}
case ANY:
case DYNAMIC_STAR:
return typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.ANY), true);
default:
throw new AssertionError();
}
}
Aggregations