Search in sources :

Example 6 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class RegistryAvroRowDataSeDeSchemaTest method testRowDataWriteReadWithSchema.

private void testRowDataWriteReadWithSchema(Schema schema) throws Exception {
    DataType dataType = AvroSchemaConverter.convertToDataType(schema.toString());
    RowType rowType = (RowType) dataType.getLogicalType();
    AvroRowDataSerializationSchema serializer = getSerializationSchema(rowType, schema);
    Schema writeSchema = AvroSchemaConverter.convertToSchema(dataType.getLogicalType());
    AvroRowDataDeserializationSchema deserializer = getDeserializationSchema(rowType, writeSchema);
    serializer.open(null);
    deserializer.open(null);
    assertNull(deserializer.deserialize(null));
    RowData oriData = address2RowData(address);
    byte[] serialized = serializer.serialize(oriData);
    RowData rowData = deserializer.deserialize(serialized);
    assertThat(rowData.getArity(), equalTo(schema.getFields().size()));
    assertEquals(address.getNum(), rowData.getInt(0));
    assertEquals(address.getStreet(), rowData.getString(1).toString());
    if (schema != ADDRESS_SCHEMA_COMPATIBLE) {
        assertEquals(address.getCity(), rowData.getString(2).toString());
        assertEquals(address.getState(), rowData.getString(3).toString());
        assertEquals(address.getZip(), rowData.getString(4).toString());
    }
}
Also used : AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) RegistryAvroDeserializationSchema(org.apache.flink.formats.avro.RegistryAvroDeserializationSchema) AvroRowDataDeserializationSchema(org.apache.flink.formats.avro.AvroRowDataDeserializationSchema) Schema(org.apache.avro.Schema) RegistryAvroSerializationSchema(org.apache.flink.formats.avro.RegistryAvroSerializationSchema) AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) AvroRowDataDeserializationSchema(org.apache.flink.formats.avro.AvroRowDataDeserializationSchema)

Example 7 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class RegistryAvroRowDataSeDeSchemaTest method testRowDataReadWithNonRegistryAvro.

@Test
public void testRowDataReadWithNonRegistryAvro() throws Exception {
    DataType dataType = AvroSchemaConverter.convertToDataType(ADDRESS_SCHEMA.toString());
    RowType rowType = (RowType) dataType.getLogicalType();
    AvroRowDataDeserializationSchema deserializer = getDeserializationSchema(rowType, ADDRESS_SCHEMA);
    deserializer.open(null);
    client.register(SUBJECT, ADDRESS_SCHEMA);
    byte[] oriBytes = writeRecord(address, ADDRESS_SCHEMA);
    expectedEx.expect(IOException.class);
    expectedEx.expect(containsCause(new IOException("Unknown data format. Magic number does not match")));
    deserializer.deserialize(oriBytes);
}
Also used : DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) IOException(java.io.IOException) AvroRowDataDeserializationSchema(org.apache.flink.formats.avro.AvroRowDataDeserializationSchema) Test(org.junit.Test)

Example 8 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class AvroFormatFactory method createDecodingFormat.

@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    FactoryUtil.validateFactoryOptions(this, formatOptions);
    return new ProjectableDecodingFormat<DeserializationSchema<RowData>>() {

        @Override
        public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType physicalDataType, int[][] projections) {
            final DataType producedDataType = Projection.of(projections).project(physicalDataType);
            final RowType rowType = (RowType) producedDataType.getLogicalType();
            final TypeInformation<RowData> rowDataTypeInfo = context.createTypeInformation(producedDataType);
            return new AvroRowDataDeserializationSchema(rowType, rowDataTypeInfo);
        }

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.insertOnly();
        }
    };
}
Also used : RowData(org.apache.flink.table.data.RowData) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType)

Example 9 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class HiveParserCalcitePlanner method genUDTFPlan.

private RelNode genUDTFPlan(SqlOperator sqlOperator, String genericUDTFName, String outputTableAlias, List<String> colAliases, HiveParserQB qb, List<RexNode> operands, List<ColumnInfo> opColInfos, RelNode input, boolean inSelect, boolean isOuter) throws SemanticException {
    Preconditions.checkState(!isOuter || !inSelect, "OUTER is not supported for SELECT UDTF");
    // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
    HiveParserQBParseInfo qbp = qb.getParseInfo();
    if (inSelect && !qbp.getDestToGroupBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToDistributeBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToSortBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
    }
    if (inSelect && !qbp.getDestToClusterBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
    }
    if (inSelect && !qbp.getAliasToLateralViews().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
    }
    LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
    // Create the object inspector for the input columns and initialize the UDTF
    RelDataType relDataType = HiveParserUtils.inferReturnTypeForOperands(sqlOperator, operands, cluster.getTypeFactory());
    DataType dataType = HiveParserUtils.toDataType(relDataType);
    StructObjectInspector outputOI = (StructObjectInspector) HiveInspectors.getObjectInspector(HiveTypeUtil.toHiveTypeInfo(dataType, false));
    // this should only happen for select udtf
    if (outputTableAlias == null) {
        Preconditions.checkState(inSelect, "Table alias not specified for lateral view");
        String prefix = "select_" + genericUDTFName + "_alias_";
        int i = 0;
        while (qb.getAliases().contains(prefix + i)) {
            i++;
        }
        outputTableAlias = prefix + i;
    }
    if (colAliases.isEmpty()) {
        // user did not specify alias names, infer names from outputOI
        for (StructField field : outputOI.getAllStructFieldRefs()) {
            colAliases.add(field.getFieldName());
        }
    }
    // Make sure that the number of column aliases in the AS clause matches the number of
    // columns output by the UDTF
    int numOutputCols = outputOI.getAllStructFieldRefs().size();
    int numSuppliedAliases = colAliases.size();
    if (numOutputCols != numSuppliedAliases) {
        throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numOutputCols + " aliases " + "but got " + numSuppliedAliases));
    }
    // Generate the output column info's / row resolver using internal names.
    ArrayList<ColumnInfo> udtfOutputCols = new ArrayList<>();
    Iterator<String> colAliasesIter = colAliases.iterator();
    for (StructField sf : outputOI.getAllStructFieldRefs()) {
        String colAlias = colAliasesIter.next();
        assert (colAlias != null);
        // Since the UDTF operator feeds into a LVJ operator that will rename all the internal
        // names,
        // we can just use field name from the UDTF's OI as the internal name
        ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
        udtfOutputCols.add(col);
    }
    // Create the row resolver for the table function scan
    HiveParserRowResolver udtfOutRR = new HiveParserRowResolver();
    for (int i = 0; i < udtfOutputCols.size(); i++) {
        udtfOutRR.put(outputTableAlias, colAliases.get(i), udtfOutputCols.get(i));
    }
    // Build row type from field <type, name>
    RelDataType retType = HiveParserTypeConverter.getType(cluster, udtfOutRR, null);
    List<RelDataType> argTypes = new ArrayList<>();
    RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory();
    for (ColumnInfo ci : opColInfos) {
        argTypes.add(HiveParserUtils.toRelDataType(ci.getType(), dtFactory));
    }
    SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteFn(genericUDTFName, argTypes, retType, false);
    RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, operands);
    // convert the rex call
    TableFunctionConverter udtfConverter = new TableFunctionConverter(cluster, input, frameworkConfig.getOperatorTable(), catalogReader.nameMatcher());
    RexCall convertedCall = (RexCall) rexNode.accept(udtfConverter);
    SqlOperator convertedOperator = convertedCall.getOperator();
    Preconditions.checkState(convertedOperator instanceof SqlUserDefinedTableFunction, "Expect operator to be " + SqlUserDefinedTableFunction.class.getSimpleName() + ", actually got " + convertedOperator.getClass().getSimpleName());
    // TODO: how to decide this?
    Type elementType = Object[].class;
    // create LogicalTableFunctionScan
    RelNode tableFunctionScan = LogicalTableFunctionScan.create(input.getCluster(), Collections.emptyList(), convertedCall, elementType, retType, null);
    // remember the table alias for the UDTF so that we can reference the cols later
    qb.addAlias(outputTableAlias);
    RelNode correlRel;
    RexBuilder rexBuilder = cluster.getRexBuilder();
    // find correlation in the converted call
    Pair<List<CorrelationId>, ImmutableBitSet> correlUse = getCorrelationUse(convertedCall);
    // create correlate node
    if (correlUse == null) {
        correlRel = plannerContext.createRelBuilder(catalogManager.getCurrentCatalog(), catalogManager.getCurrentDatabase()).push(input).push(tableFunctionScan).join(isOuter ? JoinRelType.LEFT : JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
    } else {
        if (correlUse.left.size() > 1) {
            tableFunctionScan = DeduplicateCorrelateVariables.go(rexBuilder, correlUse.left.get(0), Util.skip(correlUse.left), tableFunctionScan);
        }
        correlRel = LogicalCorrelate.create(input, tableFunctionScan, correlUse.left.get(0), correlUse.right, isOuter ? JoinRelType.LEFT : JoinRelType.INNER);
    }
    // Add new rel & its RR to the maps
    relToHiveColNameCalcitePosMap.put(tableFunctionScan, buildHiveToCalciteColumnMap(udtfOutRR));
    relToRowResolver.put(tableFunctionScan, udtfOutRR);
    HiveParserRowResolver correlRR = HiveParserRowResolver.getCombinedRR(relToRowResolver.get(input), relToRowResolver.get(tableFunctionScan));
    relToHiveColNameCalcitePosMap.put(correlRel, buildHiveToCalciteColumnMap(correlRR));
    relToRowResolver.put(correlRel, correlRR);
    if (!inSelect) {
        return correlRel;
    }
    // create project node
    List<RexNode> projects = new ArrayList<>();
    HiveParserRowResolver projectRR = new HiveParserRowResolver();
    int j = 0;
    for (int i = input.getRowType().getFieldCount(); i < correlRel.getRowType().getFieldCount(); i++) {
        projects.add(cluster.getRexBuilder().makeInputRef(correlRel, i));
        ColumnInfo inputColInfo = correlRR.getRowSchema().getSignature().get(i);
        String colAlias = inputColInfo.getAlias();
        ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(j++), inputColInfo.getObjectInspector(), null, false);
        projectRR.put(null, colAlias, colInfo);
    }
    RelNode projectNode = LogicalProject.create(correlRel, Collections.emptyList(), projects, tableFunctionScan.getRowType());
    relToHiveColNameCalcitePosMap.put(projectNode, buildHiveToCalciteColumnMap(projectRR));
    relToRowResolver.put(projectNode, projectRR);
    return projectNode;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) SqlOperator(org.apache.calcite.sql.SqlOperator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelDataType(org.apache.calcite.rel.type.RelDataType) RexCall(org.apache.calcite.rex.RexCall) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SqlUserDefinedTableFunction(org.apache.calcite.sql.validate.SqlUserDefinedTableFunction) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) DataType(org.apache.flink.table.types.DataType) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) CompositeList(org.apache.calcite.util.CompositeList) List(java.util.List) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) DataType(org.apache.flink.table.types.DataType) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) RelDataType(org.apache.calcite.rel.type.RelDataType) JoinRelType(org.apache.calcite.rel.core.JoinRelType) HiveParserBaseSemanticAnalyzer.obtainTableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.obtainTableType) Type(java.lang.reflect.Type) TableType(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.TableType) HiveParserQBParseInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBParseInfo) RelNode(org.apache.calcite.rel.RelNode) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RexNode(org.apache.calcite.rex.RexNode)

Example 10 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class HiveTableUtil method createHiveColumns.

/**
 * Create Hive columns from Flink TableSchema.
 */
public static List<FieldSchema> createHiveColumns(TableSchema schema) {
    String[] fieldNames = schema.getFieldNames();
    DataType[] fieldTypes = schema.getFieldDataTypes();
    List<FieldSchema> columns = new ArrayList<>(fieldNames.length);
    for (int i = 0; i < fieldNames.length; i++) {
        columns.add(new FieldSchema(fieldNames[i], HiveTypeUtil.toHiveTypeInfo(fieldTypes[i], true).getTypeName(), null));
    }
    return columns;
}
Also used : FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) DataType(org.apache.flink.table.types.DataType) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint)

Aggregations

DataType (org.apache.flink.table.types.DataType)260 Test (org.junit.Test)72 RowType (org.apache.flink.table.types.logical.RowType)59 LogicalType (org.apache.flink.table.types.logical.LogicalType)58 RowData (org.apache.flink.table.data.RowData)54 List (java.util.List)38 FieldsDataType (org.apache.flink.table.types.FieldsDataType)32 ValidationException (org.apache.flink.table.api.ValidationException)31 ArrayList (java.util.ArrayList)29 Collectors (java.util.stream.Collectors)24 AtomicDataType (org.apache.flink.table.types.AtomicDataType)24 Map (java.util.Map)23 Internal (org.apache.flink.annotation.Internal)23 TableException (org.apache.flink.table.api.TableException)23 HashMap (java.util.HashMap)22 GenericRowData (org.apache.flink.table.data.GenericRowData)22 Row (org.apache.flink.types.Row)22 TableSchema (org.apache.flink.table.api.TableSchema)20 TypeConversions.fromLogicalToDataType (org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType)19 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)18