use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.
the class HiveParserCalcitePlanner method genSetOpLogicalPlan.
@SuppressWarnings("nls")
private RelNode genSetOpLogicalPlan(HiveParserQBExpr.Opcode opcode, String alias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException {
// 1. Get Row Resolvers, Column map for original left and right input of SetOp Rel
HiveParserRowResolver leftRR = relToRowResolver.get(leftRel);
HiveParserRowResolver rightRR = relToRowResolver.get(rightRel);
HashMap<String, ColumnInfo> leftMap = leftRR.getFieldMap(leftalias);
HashMap<String, ColumnInfo> rightMap = rightRR.getFieldMap(rightalias);
// 2. Validate that SetOp is feasible according to Hive (by using type info from RR)
if (leftMap.size() != rightMap.size()) {
throw new SemanticException("Schema of both sides of union should match.");
}
// 3. construct SetOp Output RR using original left & right Input
HiveParserRowResolver setOpOutRR = new HiveParserRowResolver();
Iterator<Map.Entry<String, ColumnInfo>> lIter = leftMap.entrySet().iterator();
Iterator<Map.Entry<String, ColumnInfo>> rIter = rightMap.entrySet().iterator();
while (lIter.hasNext()) {
Map.Entry<String, ColumnInfo> lEntry = lIter.next();
Map.Entry<String, ColumnInfo> rEntry = rIter.next();
ColumnInfo lInfo = lEntry.getValue();
ColumnInfo rInfo = rEntry.getValue();
String field = lEntry.getKey();
// try widening conversion, otherwise fail union
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
if (commonTypeInfo == null) {
HiveParserASTNode tabRef = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo().getSrcForAlias(getQB().getAliases().get(0));
throw new SemanticException(generateErrorMessage(tabRef, "Schema of both sides of setop should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
}
ColumnInfo setOpColInfo = new ColumnInfo(lInfo);
setOpColInfo.setType(commonTypeInfo);
setOpOutRR.put(alias, field, setOpColInfo);
}
// 4. Determine which columns requires cast on left/right input (Calcite requires exact
// types on both sides of SetOp)
boolean leftNeedsTypeCast = false;
boolean rightNeedsTypeCast = false;
List<RexNode> leftProjs = new ArrayList<>();
List<RexNode> rightProjs = new ArrayList<>();
List<RelDataTypeField> leftFields = leftRel.getRowType().getFieldList();
List<RelDataTypeField> rightFields = rightRel.getRowType().getFieldList();
for (int i = 0; i < leftFields.size(); i++) {
RelDataType leftFieldType = leftFields.get(i).getType();
RelDataType rightFieldType = rightFields.get(i).getType();
if (!leftFieldType.equals(rightFieldType)) {
RelDataType unionFieldType = HiveParserUtils.toRelDataType(setOpOutRR.getColumnInfos().get(i).getType(), cluster.getTypeFactory());
if (!unionFieldType.equals(leftFieldType)) {
leftNeedsTypeCast = true;
}
leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
if (!unionFieldType.equals(rightFieldType)) {
rightNeedsTypeCast = true;
}
rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
} else {
leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
}
}
// parity
if (leftNeedsTypeCast) {
leftRel = LogicalProject.create(leftRel, Collections.emptyList(), leftProjs, leftRel.getRowType().getFieldNames());
}
if (rightNeedsTypeCast) {
rightRel = LogicalProject.create(rightRel, Collections.emptyList(), rightProjs, rightRel.getRowType().getFieldNames());
}
// 6. Construct SetOp Rel
List<RelNode> leftAndRight = Arrays.asList(leftRel, rightRel);
SetOp setOpRel;
switch(opcode) {
case UNION:
setOpRel = LogicalUnion.create(leftAndRight, true);
break;
case INTERSECT:
setOpRel = LogicalIntersect.create(leftAndRight, false);
break;
case INTERSECTALL:
setOpRel = LogicalIntersect.create(leftAndRight, true);
break;
case EXCEPT:
setOpRel = LogicalMinus.create(leftAndRight, false);
break;
case EXCEPTALL:
setOpRel = LogicalMinus.create(leftAndRight, true);
break;
default:
throw new SemanticException("Unsupported set operator " + opcode.toString());
}
relToRowResolver.put(setOpRel, setOpOutRR);
relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
return setOpRel;
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.
the class HiveParserCalcitePlanner method genOBLogicalPlan.
private Pair<Sort, RelNode> genOBLogicalPlan(HiveParserQB qb, RelNode srcRel, boolean outermostOB) throws SemanticException {
Sort sortRel = null;
RelNode originalOBInput = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
String dest = qbp.getClauseNames().iterator().next();
HiveParserASTNode obAST = qbp.getOrderByForClause(dest);
if (obAST != null) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String mapRedMode = semanticAnalyzer.getConf().getVar(HiveConf.ConfVars.HIVEMAPREDMODE);
boolean banLargeQuery = Boolean.parseBoolean(semanticAnalyzer.getConf().get("hive.strict.checks.large.query", "false"));
if ("strict".equalsIgnoreCase(mapRedMode) || banLargeQuery) {
throw new SemanticException(generateErrorMessage(obAST, "Order by-s without limit"));
}
}
// 2. Walk through OB exprs and extract field collations and additional
// virtual columns needed
final List<RexNode> virtualCols = new ArrayList<>();
final List<RelFieldCollation> fieldCollations = new ArrayList<>();
int fieldIndex;
List<Node> obASTExprLst = obAST.getChildren();
HiveParserASTNode obASTExpr;
HiveParserASTNode nullOrderASTExpr;
List<Pair<HiveParserASTNode, TypeInfo>> vcASTAndType = new ArrayList<>();
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), relToHiveColNameCalcitePosMap.get(srcRel), 0, false, funcConverter);
int numSrcFields = srcRel.getRowType().getFieldCount();
for (Node node : obASTExprLst) {
// 2.1 Convert AST Expr to ExprNode
obASTExpr = (HiveParserASTNode) node;
nullOrderASTExpr = (HiveParserASTNode) obASTExpr.getChild(0);
HiveParserASTNode ref = (HiveParserASTNode) nullOrderASTExpr.getChild(0);
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(ref, inputRR);
ExprNodeDesc obExprNodeDesc = astToExprNodeDesc.get(ref);
if (obExprNodeDesc == null) {
throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
}
// 2.2 Convert ExprNode to RexNode
RexNode rexNode = converter.convert(obExprNodeDesc).accept(funcConverter);
// present in the child (& hence we add a child Project Rel)
if (rexNode instanceof RexInputRef) {
fieldIndex = ((RexInputRef) rexNode).getIndex();
} else {
fieldIndex = numSrcFields + virtualCols.size();
virtualCols.add(rexNode);
vcASTAndType.add(new Pair<>(ref, obExprNodeDesc.getTypeInfo()));
}
// 2.4 Determine the Direction of order by
RelFieldCollation.Direction direction = RelFieldCollation.Direction.DESCENDING;
if (obASTExpr.getType() == HiveASTParser.TOK_TABSORTCOLNAMEASC) {
direction = RelFieldCollation.Direction.ASCENDING;
}
RelFieldCollation.NullDirection nullOrder;
if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_FIRST) {
nullOrder = RelFieldCollation.NullDirection.FIRST;
} else if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_LAST) {
nullOrder = RelFieldCollation.NullDirection.LAST;
} else {
throw new SemanticException("Unexpected null ordering option: " + nullOrderASTExpr.getType());
}
// 2.5 Add to field collations
fieldCollations.add(new RelFieldCollation(fieldIndex, direction, nullOrder));
}
// 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
// for top constraining Sel
RelNode obInputRel = srcRel;
if (!virtualCols.isEmpty()) {
List<RexNode> originalInputRefs = srcRel.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
HiveParserRowResolver obSyntheticProjectRR = new HiveParserRowResolver();
if (!HiveParserRowResolver.add(obSyntheticProjectRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
int vcolPos = inputRR.getRowSchema().getSignature().size();
for (Pair<HiveParserASTNode, TypeInfo> astTypePair : vcASTAndType) {
obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(getColumnInternalName(vcolPos), astTypePair.getValue(), null, false));
vcolPos++;
}
obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, virtualCols), obSyntheticProjectRR, srcRel);
if (outermostOB) {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
} else {
if (!HiveParserRowResolver.add(outputRR, obSyntheticProjectRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
originalOBInput = srcRel;
} else {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSet();
RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
sortRel = LogicalSort.create(obInputRel, canonizedCollation, null, null);
// 5. Update the maps
Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToRowResolver.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return (new Pair<>(sortRel, originalOBInput));
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project flink by apache.
the class HiveParserCalcitePlanner method genUDTFPlan.
private RelNode genUDTFPlan(SqlOperator sqlOperator, String genericUDTFName, String outputTableAlias, List<String> colAliases, HiveParserQB qb, List<RexNode> operands, List<ColumnInfo> opColInfos, RelNode input, boolean inSelect, boolean isOuter) throws SemanticException {
Preconditions.checkState(!isOuter || !inSelect, "OUTER is not supported for SELECT UDTF");
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
HiveParserQBParseInfo qbp = qb.getParseInfo();
if (inSelect && !qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (inSelect && !qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (inSelect && !qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (inSelect && !qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (inSelect && !qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
// Create the object inspector for the input columns and initialize the UDTF
RelDataType relDataType = HiveParserUtils.inferReturnTypeForOperands(sqlOperator, operands, cluster.getTypeFactory());
DataType dataType = HiveParserUtils.toDataType(relDataType);
StructObjectInspector outputOI = (StructObjectInspector) HiveInspectors.getObjectInspector(HiveTypeUtil.toHiveTypeInfo(dataType, false));
// this should only happen for select udtf
if (outputTableAlias == null) {
Preconditions.checkState(inSelect, "Table alias not specified for lateral view");
String prefix = "select_" + genericUDTFName + "_alias_";
int i = 0;
while (qb.getAliases().contains(prefix + i)) {
i++;
}
outputTableAlias = prefix + i;
}
if (colAliases.isEmpty()) {
// user did not specify alias names, infer names from outputOI
for (StructField field : outputOI.getAllStructFieldRefs()) {
colAliases.add(field.getFieldName());
}
}
// Make sure that the number of column aliases in the AS clause matches the number of
// columns output by the UDTF
int numOutputCols = outputOI.getAllStructFieldRefs().size();
int numSuppliedAliases = colAliases.size();
if (numOutputCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numOutputCols + " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
ArrayList<ColumnInfo> udtfOutputCols = new ArrayList<>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (StructField sf : outputOI.getAllStructFieldRefs()) {
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename all the internal
// names,
// we can just use field name from the UDTF's OI as the internal name
ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
udtfOutputCols.add(col);
}
// Create the row resolver for the table function scan
HiveParserRowResolver udtfOutRR = new HiveParserRowResolver();
for (int i = 0; i < udtfOutputCols.size(); i++) {
udtfOutRR.put(outputTableAlias, colAliases.get(i), udtfOutputCols.get(i));
}
// Build row type from field <type, name>
RelDataType retType = HiveParserTypeConverter.getType(cluster, udtfOutRR, null);
List<RelDataType> argTypes = new ArrayList<>();
RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory();
for (ColumnInfo ci : opColInfos) {
argTypes.add(HiveParserUtils.toRelDataType(ci.getType(), dtFactory));
}
SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteFn(genericUDTFName, argTypes, retType, false);
RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, operands);
// convert the rex call
TableFunctionConverter udtfConverter = new TableFunctionConverter(cluster, input, frameworkConfig.getOperatorTable(), catalogReader.nameMatcher());
RexCall convertedCall = (RexCall) rexNode.accept(udtfConverter);
SqlOperator convertedOperator = convertedCall.getOperator();
Preconditions.checkState(convertedOperator instanceof SqlUserDefinedTableFunction, "Expect operator to be " + SqlUserDefinedTableFunction.class.getSimpleName() + ", actually got " + convertedOperator.getClass().getSimpleName());
// TODO: how to decide this?
Type elementType = Object[].class;
// create LogicalTableFunctionScan
RelNode tableFunctionScan = LogicalTableFunctionScan.create(input.getCluster(), Collections.emptyList(), convertedCall, elementType, retType, null);
// remember the table alias for the UDTF so that we can reference the cols later
qb.addAlias(outputTableAlias);
RelNode correlRel;
RexBuilder rexBuilder = cluster.getRexBuilder();
// find correlation in the converted call
Pair<List<CorrelationId>, ImmutableBitSet> correlUse = getCorrelationUse(convertedCall);
// create correlate node
if (correlUse == null) {
correlRel = plannerContext.createRelBuilder(catalogManager.getCurrentCatalog(), catalogManager.getCurrentDatabase()).push(input).push(tableFunctionScan).join(isOuter ? JoinRelType.LEFT : JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
} else {
if (correlUse.left.size() > 1) {
tableFunctionScan = DeduplicateCorrelateVariables.go(rexBuilder, correlUse.left.get(0), Util.skip(correlUse.left), tableFunctionScan);
}
correlRel = LogicalCorrelate.create(input, tableFunctionScan, correlUse.left.get(0), correlUse.right, isOuter ? JoinRelType.LEFT : JoinRelType.INNER);
}
// Add new rel & its RR to the maps
relToHiveColNameCalcitePosMap.put(tableFunctionScan, buildHiveToCalciteColumnMap(udtfOutRR));
relToRowResolver.put(tableFunctionScan, udtfOutRR);
HiveParserRowResolver correlRR = HiveParserRowResolver.getCombinedRR(relToRowResolver.get(input), relToRowResolver.get(tableFunctionScan));
relToHiveColNameCalcitePosMap.put(correlRel, buildHiveToCalciteColumnMap(correlRR));
relToRowResolver.put(correlRel, correlRR);
if (!inSelect) {
return correlRel;
}
// create project node
List<RexNode> projects = new ArrayList<>();
HiveParserRowResolver projectRR = new HiveParserRowResolver();
int j = 0;
for (int i = input.getRowType().getFieldCount(); i < correlRel.getRowType().getFieldCount(); i++) {
projects.add(cluster.getRexBuilder().makeInputRef(correlRel, i));
ColumnInfo inputColInfo = correlRR.getRowSchema().getSignature().get(i);
String colAlias = inputColInfo.getAlias();
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(j++), inputColInfo.getObjectInspector(), null, false);
projectRR.put(null, colAlias, colInfo);
}
RelNode projectNode = LogicalProject.create(correlRel, Collections.emptyList(), projects, tableFunctionScan.getRowType());
relToHiveColNameCalcitePosMap.put(projectNode, buildHiveToCalciteColumnMap(projectRR));
relToRowResolver.put(projectNode, projectRR);
return projectNode;
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class AccumuloRowSerializer method serializeRowId.
/**
* Serialize an Accumulo rowid
*/
protected byte[] serializeRowId(Object rowId, StructField rowIdField, ColumnMapping rowIdMapping) throws IOException {
if (rowId == null) {
throw new IOException("Accumulo rowId cannot be NULL");
}
// Reset the buffer we're going to use
output.reset();
ObjectInspector rowIdFieldOI = rowIdField.getFieldObjectInspector();
String rowIdMappingType = rowIdMapping.getColumnType();
TypeInfo rowIdTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(rowIdMappingType);
if (!rowIdFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && rowIdTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
// we always serialize the String type using the escaped algorithm for LazyString
writeString(output, SerDeUtils.getJSONString(rowId, rowIdFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return output.toByteArray();
}
// use the serialization option switch to write primitive values as either a variable
// length UTF8 string or a fixed width bytes if serializing in binary format
getSerializedValue(rowIdFieldOI, rowId, output, rowIdMapping);
return output.toByteArray();
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class AccumuloRowSerializer method writeWithLevel.
/**
* Recursively serialize an Object using its {@link ObjectInspector}, respecting the
* separators defined by the {@link LazySerDeParameters}.
* @param oi ObjectInspector for the current object
* @param value The current object
* @param output A buffer output is written to
* @param mapping The mapping for this Hive column
* @param level The current level/offset for the SerDe separator
* @throws IOException
*/
protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Output output, ColumnMapping mapping, int level) throws IOException {
switch(oi.getCategory()) {
case PRIMITIVE:
if (mapping.getEncoding() == ColumnEncoding.BINARY) {
this.writeBinary(output, value, (PrimitiveObjectInspector) oi);
} else {
this.writeString(output, value, (PrimitiveObjectInspector) oi);
}
return;
case LIST:
char separator = (char) serDeParams.getSeparators()[level];
ListObjectInspector loi = (ListObjectInspector) oi;
List<?> list = loi.getList(value);
ObjectInspector eoi = loi.getListElementObjectInspector();
if (list == null) {
log.debug("No objects found when serializing list");
return;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
output.write(separator);
}
writeWithLevel(eoi, list.get(i), output, mapping, level + 1);
}
}
return;
case MAP:
char sep = (char) serDeParams.getSeparators()[level];
char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1];
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(value);
if (map == null) {
log.debug("No object found when serializing map");
return;
} else {
boolean first = true;
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (first) {
first = false;
} else {
output.write(sep);
}
writeWithLevel(koi, entry.getKey(), output, mapping, level + 2);
output.write(keyValueSeparator);
writeWithLevel(voi, entry.getValue(), output, mapping, level + 2);
}
}
return;
case STRUCT:
sep = (char) serDeParams.getSeparators()[level];
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
list = soi.getStructFieldsDataAsList(value);
if (list == null) {
log.debug("No object found when serializing struct");
return;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
output.write(sep);
}
writeWithLevel(fields.get(i).getFieldObjectInspector(), list.get(i), output, mapping, level + 1);
}
}
return;
default:
throw new RuntimeException("Unknown category type: " + oi.getCategory());
}
}
Aggregations