Search in sources :

Example 6 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogGenericMetadataTest method testGenericTableWithoutConnectorProp.

@Test
public void testGenericTableWithoutConnectorProp() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    TableSchema tableSchema = TableSchema.builder().fields(new String[] { "s", "ts" }, new DataType[] { DataTypes.STRING(), DataTypes.TIMESTAMP_LTZ(3) }).watermark("ts", "ts-INTERVAL '1' SECOND", DataTypes.TIMESTAMP_LTZ(3)).build();
    CatalogTable catalogTable = new CatalogTableImpl(tableSchema, Collections.emptyMap(), null);
    catalog.createTable(path1, catalogTable, false);
    CatalogTable retrievedTable = (CatalogTable) catalog.getTable(path1);
    assertEquals(tableSchema, retrievedTable.getSchema());
    assertEquals(Collections.emptyMap(), retrievedTable.getOptions());
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Example 7 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogUdfITCase method testUdf.

private void testUdf(boolean batch) throws Exception {
    StreamExecutionEnvironment env = null;
    TableEnvironment tEnv;
    EnvironmentSettings.Builder settingsBuilder = EnvironmentSettings.newInstance();
    if (batch) {
        settingsBuilder.inBatchMode();
    } else {
        settingsBuilder.inStreamingMode();
    }
    if (batch) {
        tEnv = TableEnvironment.create(settingsBuilder.build());
    } else {
        env = StreamExecutionEnvironment.getExecutionEnvironment();
        tEnv = StreamTableEnvironment.create(env, settingsBuilder.build());
    }
    BatchTestBase.configForMiniCluster(tEnv.getConfig());
    tEnv.registerCatalog("myhive", hiveCatalog);
    tEnv.useCatalog("myhive");
    String innerSql = format("select mygenericudf(myudf(name), 1) as a, mygenericudf(myudf(age), 1) as b," + " s from %s, lateral table(myudtf(name, 1)) as T(s)", sourceTableName);
    String selectSql = format("select a, s, sum(b), myudaf(b) from (%s) group by a, s", innerSql);
    List<String> results;
    if (batch) {
        Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
        final TableSchema sinkSchema = TableSchema.builder().field("name1", Types.STRING()).field("name2", Types.STRING()).field("sum1", Types.INT()).field("sum2", Types.LONG()).build();
        final Map<String, String> sinkOptions = new HashMap<>();
        sinkOptions.put("connector.type", "filesystem");
        sinkOptions.put("connector.path", p.toAbsolutePath().toString());
        sinkOptions.put("format.type", "csv");
        final CatalogTable sink = new CatalogTableImpl(sinkSchema, sinkOptions, "Comment.");
        hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
        tEnv.executeSql(format("insert into %s " + selectSql, sinkTableName)).await();
        // assert written result
        StringBuilder builder = new StringBuilder();
        try (Stream<Path> paths = Files.walk(Paths.get(p.toAbsolutePath().toString()))) {
            paths.filter(Files::isRegularFile).forEach(path -> {
                try {
                    String content = FileUtils.readFileUtf8(path.toFile());
                    if (content.isEmpty()) {
                        return;
                    }
                    builder.append(content);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            });
        }
        results = Arrays.stream(builder.toString().split("\n")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
    } else {
        StreamTableEnvironment streamTEnv = (StreamTableEnvironment) tEnv;
        TestingRetractSink sink = new TestingRetractSink();
        streamTEnv.toRetractStream(tEnv.sqlQuery(selectSql), Row.class).map(new JavaToScala()).addSink((SinkFunction) sink);
        env.execute("");
        results = JavaScalaConversionUtil.toJava(sink.getRetractResults());
    }
    results = new ArrayList<>(results);
    results.sort(String::compareTo);
    Assert.assertEquals(Arrays.asList("1,1,2,2", "2,2,4,4", "3,3,6,6"), results);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(java.nio.file.Path) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) TestingRetractSink(org.apache.flink.table.planner.runtime.utils.TestingRetractSink) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) IOException(java.io.IOException) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment)

Example 8 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveParserDDLSemanticAnalyzer method convertAlterTableModifyCols.

private Operation convertAlterTableModifyCols(CatalogBaseTable alteredTable, String tblName, HiveParserASTNode ast, boolean replace) throws SemanticException {
    List<FieldSchema> newCols = HiveParserBaseSemanticAnalyzer.getColumns((HiveParserASTNode) ast.getChild(0));
    boolean isCascade = false;
    if (null != ast.getFirstChildWithType(HiveASTParser.TOK_CASCADE)) {
        isCascade = true;
    }
    ObjectIdentifier tableIdentifier = parseObjectIdentifier(tblName);
    CatalogTable oldTable = (CatalogTable) alteredTable;
    // prepare properties
    Map<String, String> props = new HashMap<>(oldTable.getOptions());
    props.put(ALTER_TABLE_OP, ALTER_COLUMNS.name());
    if (isCascade) {
        props.put(ALTER_COL_CASCADE, "true");
    }
    TableSchema oldSchema = oldTable.getSchema();
    final int numPartCol = oldTable.getPartitionKeys().size();
    TableSchema.Builder builder = TableSchema.builder();
    // add existing non-part col if we're not replacing
    if (!replace) {
        List<TableColumn> nonPartCols = oldSchema.getTableColumns().subList(0, oldSchema.getFieldCount() - numPartCol);
        for (TableColumn column : nonPartCols) {
            builder.add(column);
        }
        setWatermarkAndPK(builder, oldSchema);
    }
    // add new cols
    for (FieldSchema col : newCols) {
        builder.add(TableColumn.physical(col.getName(), HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(col.getType()))));
    }
    // add part cols
    List<TableColumn> partCols = oldSchema.getTableColumns().subList(oldSchema.getFieldCount() - numPartCol, oldSchema.getFieldCount());
    for (TableColumn column : partCols) {
        builder.add(column);
    }
    return new AlterTableSchemaOperation(tableIdentifier, new CatalogTableImpl(builder.build(), oldTable.getPartitionKeys(), props, oldTable.getComment()));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) CatalogTable(org.apache.flink.table.catalog.CatalogTable) TableColumn(org.apache.flink.table.api.TableColumn) NotNullConstraint(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.NotNullConstraint) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) AlterTableSchemaOperation(org.apache.flink.table.operations.ddl.AlterTableSchemaOperation) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier)

Example 9 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveParserDDLSemanticAnalyzer method convertCreateTable.

private CreateTableOperation convertCreateTable(String compoundName, boolean isExternal, boolean ifNotExists, boolean isTemporary, List<FieldSchema> cols, List<FieldSchema> partCols, String comment, String location, Map<String, String> tblProps, HiveParserRowFormatParams rowFormatParams, HiveParserStorageFormat storageFormat, List<PrimaryKey> primaryKeys, List<NotNullConstraint> notNullConstraints) {
    Map<String, String> props = new HashMap<>();
    if (tblProps != null) {
        props.putAll(tblProps);
    }
    markHiveConnector(props);
    // external
    if (isExternal) {
        props.put(TABLE_IS_EXTERNAL, "true");
    }
    // PK trait
    UniqueConstraint uniqueConstraint = null;
    if (primaryKeys != null && !primaryKeys.isEmpty()) {
        PrimaryKey primaryKey = primaryKeys.get(0);
        byte trait = 0;
        if (primaryKey.isEnable()) {
            trait = HiveDDLUtils.enableConstraint(trait);
        }
        if (primaryKey.isValidate()) {
            trait = HiveDDLUtils.validateConstraint(trait);
        }
        if (primaryKey.isRely()) {
            trait = HiveDDLUtils.relyConstraint(trait);
        }
        props.put(PK_CONSTRAINT_TRAIT, String.valueOf(trait));
        List<String> pkCols = primaryKeys.stream().map(PrimaryKey::getPk).collect(Collectors.toList());
        String constraintName = primaryKey.getConstraintName();
        if (constraintName == null) {
            constraintName = pkCols.stream().collect(Collectors.joining("_", "PK_", ""));
        }
        uniqueConstraint = UniqueConstraint.primaryKey(constraintName, pkCols);
    }
    // NOT NULL constraints
    List<String> notNullCols = new ArrayList<>();
    if (!notNullConstraints.isEmpty()) {
        List<String> traits = new ArrayList<>();
        for (NotNullConstraint notNull : notNullConstraints) {
            byte trait = 0;
            if (notNull.isEnable()) {
                trait = HiveDDLUtils.enableConstraint(trait);
            }
            if (notNull.isValidate()) {
                trait = HiveDDLUtils.validateConstraint(trait);
            }
            if (notNull.isRely()) {
                trait = HiveDDLUtils.relyConstraint(trait);
            }
            traits.add(String.valueOf(trait));
            notNullCols.add(notNull.getColName());
        }
        props.put(NOT_NULL_CONSTRAINT_TRAITS, String.join(COL_DELIMITER, traits));
        props.put(NOT_NULL_COLS, String.join(COL_DELIMITER, notNullCols));
    }
    // row format
    if (rowFormatParams != null) {
        encodeRowFormat(rowFormatParams, props);
    }
    // storage format
    if (storageFormat != null) {
        encodeStorageFormat(storageFormat, props);
    }
    // location
    if (location != null) {
        props.put(TABLE_LOCATION_URI, location);
    }
    ObjectIdentifier identifier = parseObjectIdentifier(compoundName);
    Set<String> notNullColSet = new HashSet<>(notNullCols);
    if (uniqueConstraint != null) {
        notNullColSet.addAll(uniqueConstraint.getColumns());
    }
    TableSchema tableSchema = HiveTableUtil.createTableSchema(cols, partCols, notNullColSet, uniqueConstraint);
    return new CreateTableOperation(identifier, new CatalogTableImpl(tableSchema, HiveCatalog.getFieldNames(partCols), props, comment), ifNotExists, isTemporary);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) PrimaryKey(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.PrimaryKey) CreateTableOperation(org.apache.flink.table.operations.ddl.CreateTableOperation) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) NotNullConstraint(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.NotNullConstraint) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) HashSet(java.util.HashSet)

Example 10 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalog method instantiateCatalogTable.

@VisibleForTesting
CatalogBaseTable instantiateCatalogTable(Table hiveTable) {
    boolean isView = TableType.valueOf(hiveTable.getTableType()) == TableType.VIRTUAL_VIEW;
    // Table properties
    Map<String, String> properties = new HashMap<>(hiveTable.getParameters());
    boolean isHiveTable = isHiveTable(properties);
    TableSchema tableSchema;
    // Partition keys
    List<String> partitionKeys = new ArrayList<>();
    if (isHiveTable) {
        // Table schema
        tableSchema = HiveTableUtil.createTableSchema(hiveConf, hiveTable, client, hiveShim);
        if (!hiveTable.getPartitionKeys().isEmpty()) {
            partitionKeys = getFieldNames(hiveTable.getPartitionKeys());
        }
    } else {
        properties = retrieveFlinkProperties(properties);
        if (ManagedTableFactory.DEFAULT_IDENTIFIER.equalsIgnoreCase(properties.get(CONNECTOR.key()))) {
            // for Flink's managed table, we remove the connector option
            properties.remove(CONNECTOR.key());
        }
        DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
        tableSchemaProps.putProperties(properties);
        // try to get table schema with both new and old (1.10) key, in order to support tables
        // created in old version
        tableSchema = tableSchemaProps.getOptionalTableSchema(Schema.SCHEMA).orElseGet(() -> tableSchemaProps.getOptionalTableSchema("generic.table.schema").orElseGet(() -> TableSchema.builder().build()));
        partitionKeys = tableSchemaProps.getPartitionKeys();
        // remove the schema from properties
        properties = CatalogTableImpl.removeRedundant(properties, tableSchema, partitionKeys);
    }
    String comment = properties.remove(HiveCatalogConfig.COMMENT);
    if (isView) {
        return new CatalogViewImpl(hiveTable.getViewOriginalText(), hiveTable.getViewExpandedText(), tableSchema, properties, comment);
    } else {
        return new CatalogTableImpl(tableSchema, partitionKeys, properties, comment);
    }
}
Also used : CatalogViewImpl(org.apache.flink.table.catalog.CatalogViewImpl) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) DescriptorProperties(org.apache.flink.table.descriptors.DescriptorProperties) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) ArrayList(java.util.ArrayList) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Aggregations

TableSchema (org.apache.flink.table.api.TableSchema)86 Test (org.junit.Test)54 HashMap (java.util.HashMap)26 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)21 SqlNode (org.apache.calcite.sql.SqlNode)19 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 CatalogTable (org.apache.flink.table.catalog.CatalogTable)18 DataType (org.apache.flink.table.types.DataType)16 ValidationException (org.apache.flink.table.api.ValidationException)14 TableColumn (org.apache.flink.table.api.TableColumn)10 UniqueConstraint (org.apache.flink.table.api.constraints.UniqueConstraint)10 ArrayList (java.util.ArrayList)9 List (java.util.List)9 Map (java.util.Map)9 FeatureOption (org.apache.flink.sql.parser.ddl.SqlTableLike.FeatureOption)9 MergingStrategy (org.apache.flink.sql.parser.ddl.SqlTableLike.MergingStrategy)9 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)8 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)8 Arrays (java.util.Arrays)7 Configuration (org.apache.flink.configuration.Configuration)7