Search in sources :

Example 1 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class HiveCatalogHiveMetadataTest method testViewCompatibility.

// ------ table and column stats ------
@Test
public void testViewCompatibility() throws Exception {
    // we always store view schema via properties now
    // make sure non-generic views created previously can still be used
    catalog.createDatabase(db1, createDb(), false);
    Table hiveView = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(path1.getDatabaseName(), path1.getObjectName());
    // mark as a view
    hiveView.setTableType(TableType.VIRTUAL_VIEW.name());
    final String originQuery = "view origin query";
    final String expandedQuery = "view expanded query";
    hiveView.setViewOriginalText(originQuery);
    hiveView.setViewExpandedText(expandedQuery);
    // set schema in SD
    Schema schema = Schema.newBuilder().fromFields(new String[] { "i", "s" }, new AbstractDataType[] { DataTypes.INT(), DataTypes.STRING() }).build();
    List<FieldSchema> fields = new ArrayList<>();
    for (Schema.UnresolvedColumn column : schema.getColumns()) {
        String name = column.getName();
        DataType type = (DataType) ((Schema.UnresolvedPhysicalColumn) column).getDataType();
        fields.add(new FieldSchema(name, HiveTypeUtil.toHiveTypeInfo(type, true).getTypeName(), null));
    }
    hiveView.getSd().setCols(fields);
    // test mark as non-generic with is_generic
    hiveView.getParameters().put(CatalogPropertiesUtil.IS_GENERIC, "false");
    // add some other properties
    hiveView.getParameters().put("k1", "v1");
    ((HiveCatalog) catalog).client.createTable(hiveView);
    CatalogBaseTable baseTable = catalog.getTable(path1);
    assertTrue(baseTable instanceof CatalogView);
    CatalogView catalogView = (CatalogView) baseTable;
    assertEquals(schema, catalogView.getUnresolvedSchema());
    assertEquals(originQuery, catalogView.getOriginalQuery());
    assertEquals(expandedQuery, catalogView.getExpandedQuery());
    assertEquals("v1", catalogView.getOptions().get("k1"));
    // test mark as non-generic with connector
    hiveView.setDbName(path3.getDatabaseName());
    hiveView.setTableName(path3.getObjectName());
    hiveView.getParameters().remove(CatalogPropertiesUtil.IS_GENERIC);
    hiveView.getParameters().put(CONNECTOR.key(), IDENTIFIER);
    ((HiveCatalog) catalog).client.createTable(hiveView);
    baseTable = catalog.getTable(path3);
    assertTrue(baseTable instanceof CatalogView);
    catalogView = (CatalogView) baseTable;
    assertEquals(schema, catalogView.getUnresolvedSchema());
    assertEquals(originQuery, catalogView.getOriginalQuery());
    assertEquals(expandedQuery, catalogView.getExpandedQuery());
    assertEquals("v1", catalogView.getOptions().get("k1"));
}
Also used : CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) SqlAlterHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveTable) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) Table(org.apache.hadoop.hive.metastore.api.Table) AbstractDataType(org.apache.flink.table.types.AbstractDataType) Schema(org.apache.flink.table.api.Schema) TableSchema(org.apache.flink.table.api.TableSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) DataType(org.apache.flink.table.types.DataType) AbstractDataType(org.apache.flink.table.types.AbstractDataType) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogView(org.apache.flink.table.catalog.CatalogView) Test(org.junit.Test)

Example 2 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class PostgresCatalogTest method testGetTable.

@Test
public void testGetTable() throws org.apache.flink.table.catalog.exceptions.TableNotExistException {
    // test postgres.public.user1
    Schema schema = getSimpleTable().schema;
    CatalogBaseTable table = catalog.getTable(new ObjectPath("postgres", TABLE1));
    assertEquals(schema, table.getUnresolvedSchema());
    table = catalog.getTable(new ObjectPath("postgres", "public.t1"));
    assertEquals(schema, table.getUnresolvedSchema());
    // test testdb.public.user2
    table = catalog.getTable(new ObjectPath(TEST_DB, TABLE2));
    assertEquals(schema, table.getUnresolvedSchema());
    table = catalog.getTable(new ObjectPath(TEST_DB, "public.t2"));
    assertEquals(schema, table.getUnresolvedSchema());
    // test testdb.testschema.user2
    table = catalog.getTable(new ObjectPath(TEST_DB, TEST_SCHEMA + ".t3"));
    assertEquals(schema, table.getUnresolvedSchema());
}
Also used : CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Schema(org.apache.flink.table.api.Schema) Test(org.junit.Test)

Example 3 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class CatalogPropertiesUtil method deserializeCatalogTable.

/**
 * Deserializes the given map of string properties into an unresolved {@link CatalogTable}.
 */
public static CatalogTable deserializeCatalogTable(Map<String, String> properties) {
    try {
        final Schema schema = deserializeSchema(properties);
        @Nullable final String comment = properties.get(COMMENT);
        final List<String> partitionKeys = deserializePartitionKeys(properties);
        final Map<String, String> options = deserializeOptions(properties);
        return CatalogTable.of(schema, comment, partitionKeys, options);
    } catch (Exception e) {
        throw new CatalogException("Error in deserializing catalog table.", e);
    }
}
Also used : Schema(org.apache.flink.table.api.Schema) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) Nullable(javax.annotation.Nullable) TableException(org.apache.flink.table.api.TableException) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException)

Example 4 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class SchemaTranslator method createProducingResult.

/**
 * Converts the given {@link DataType} into the final {@link ProducingResult}.
 *
 * <p>This method serves three types of use cases:
 *
 * <ul>
 *   <li>1. Derive physical columns from the input schema.
 *   <li>2. Derive physical columns from the input schema but enrich with metadata column and
 *       primary key.
 *   <li>3. Entirely use declared schema.
 * </ul>
 */
public static ProducingResult createProducingResult(ResolvedSchema inputSchema, @Nullable Schema declaredSchema) {
    // the schema will be entirely derived from the input
    if (declaredSchema == null) {
        // go through data type to erase time attributes
        final DataType physicalDataType = inputSchema.toSourceRowDataType();
        final Schema schema = Schema.newBuilder().fromRowDataType(physicalDataType).build();
        return new ProducingResult(null, schema, null);
    }
    final List<UnresolvedColumn> declaredColumns = declaredSchema.getColumns();
    // thus, it only replaces physical columns with metadata rowtime or adds a primary key
    if (declaredColumns.stream().noneMatch(SchemaTranslator::isPhysical)) {
        // go through data type to erase time attributes
        final DataType sourceDataType = inputSchema.toSourceRowDataType();
        final DataType physicalDataType = patchDataTypeWithoutMetadataRowtime(sourceDataType, declaredColumns);
        final Schema.Builder builder = Schema.newBuilder();
        builder.fromRowDataType(physicalDataType);
        builder.fromSchema(declaredSchema);
        return new ProducingResult(null, builder.build(), null);
    }
    return new ProducingResult(null, declaredSchema, null);
}
Also used : UnresolvedColumn(org.apache.flink.table.api.Schema.UnresolvedColumn) Schema(org.apache.flink.table.api.Schema) DataType(org.apache.flink.table.types.DataType) AbstractDataType(org.apache.flink.table.types.AbstractDataType)

Example 5 with Schema

use of org.apache.flink.table.api.Schema in project flink by apache.

the class SchemaTranslator method createProducingResult.

/**
 * Converts the given {@link DataType} into the final {@link ProducingResult}.
 *
 * <p>This method serves one type of use case:
 *
 * <ul>
 *   <li>1. Derive physical columns from the input data type.
 * </ul>
 */
public static ProducingResult createProducingResult(DataTypeFactory dataTypeFactory, ResolvedSchema inputSchema, AbstractDataType<?> targetDataType) {
    final List<String> inputFieldNames = inputSchema.getColumnNames();
    final List<String> inputFieldNamesNormalized = inputFieldNames.stream().map(n -> n.toLowerCase(Locale.ROOT)).collect(Collectors.toList());
    final DataType resolvedDataType = dataTypeFactory.createDataType(targetDataType);
    final List<String> targetFieldNames = flattenToNames(resolvedDataType);
    final List<String> targetFieldNamesNormalized = targetFieldNames.stream().map(n -> n.toLowerCase(Locale.ROOT)).collect(Collectors.toList());
    final List<DataType> targetFieldDataTypes = flattenToDataTypes(resolvedDataType);
    // help in reorder fields for POJOs if all field names are present but out of order,
    // otherwise let the sink validation fail later
    List<String> projections = null;
    if (targetFieldNames.size() == inputFieldNames.size()) {
        // reordering by name (case-sensitive)
        if (targetFieldNames.containsAll(inputFieldNames)) {
            projections = targetFieldNames;
        } else // reordering by name (case-insensitive) but fields must be unique
        if (targetFieldNamesNormalized.containsAll(inputFieldNamesNormalized) && targetFieldNamesNormalized.stream().distinct().count() == targetFieldNames.size() && inputFieldNamesNormalized.stream().distinct().count() == inputFieldNames.size()) {
            projections = targetFieldNamesNormalized.stream().map(targetName -> {
                final int inputFieldPos = inputFieldNamesNormalized.indexOf(targetName);
                return inputFieldNames.get(inputFieldPos);
            }).collect(Collectors.toList());
        }
    }
    final Schema schema = Schema.newBuilder().fromFields(targetFieldNames, targetFieldDataTypes).build();
    return new ProducingResult(projections, schema, resolvedDataType);
}
Also used : IntStream(java.util.stream.IntStream) DataType(org.apache.flink.table.types.DataType) Schema(org.apache.flink.table.api.Schema) StructuredType(org.apache.flink.table.types.logical.StructuredType) RowType(org.apache.flink.table.types.logical.RowType) DataTypeUtils.flattenToNames(org.apache.flink.table.types.utils.DataTypeUtils.flattenToNames) LogicalTypeFamily(org.apache.flink.table.types.logical.LogicalTypeFamily) Locale(java.util.Locale) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) UnresolvedPrimaryKey(org.apache.flink.table.api.Schema.UnresolvedPrimaryKey) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) TypeInfoDataTypeConverter(org.apache.flink.table.types.utils.TypeInfoDataTypeConverter) Nullable(javax.annotation.Nullable) Projection(org.apache.flink.table.connector.Projection) AbstractDataType(org.apache.flink.table.types.AbstractDataType) DataTypes(org.apache.flink.table.api.DataTypes) UnresolvedColumn(org.apache.flink.table.api.Schema.UnresolvedColumn) Collectors(java.util.stream.Collectors) List(java.util.List) LogicalType(org.apache.flink.table.types.logical.LogicalType) ValidationException(org.apache.flink.table.api.ValidationException) UnresolvedMetadataColumn(org.apache.flink.table.api.Schema.UnresolvedMetadataColumn) Optional(java.util.Optional) Internal(org.apache.flink.annotation.Internal) DataTypeUtils.flattenToDataTypes(org.apache.flink.table.types.utils.DataTypeUtils.flattenToDataTypes) UnresolvedPhysicalColumn(org.apache.flink.table.api.Schema.UnresolvedPhysicalColumn) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) Collections(java.util.Collections) LogicalTypeChecks(org.apache.flink.table.types.logical.utils.LogicalTypeChecks) Schema(org.apache.flink.table.api.Schema) DataType(org.apache.flink.table.types.DataType) AbstractDataType(org.apache.flink.table.types.AbstractDataType)

Aggregations

Schema (org.apache.flink.table.api.Schema)14 DataType (org.apache.flink.table.types.DataType)8 Test (org.junit.Test)7 Collections (java.util.Collections)5 List (java.util.List)5 ArrayList (java.util.ArrayList)4 Collectors (java.util.stream.Collectors)4 DataTypes (org.apache.flink.table.api.DataTypes)4 ValidationException (org.apache.flink.table.api.ValidationException)4 AbstractDataType (org.apache.flink.table.types.AbstractDataType)4 Arrays (java.util.Arrays)3 UnresolvedColumn (org.apache.flink.table.api.Schema.UnresolvedColumn)3 Row (org.apache.flink.types.Row)3 ZoneId (java.time.ZoneId)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Set (java.util.Set)2 Nullable (javax.annotation.Nullable)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2 ExplainDetail (org.apache.flink.table.api.ExplainDetail)2