Search in sources :

Example 1 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveLookupTableSource method getLookupFunction.

private TableFunction<RowData> getLookupFunction(int[] keys) {
    final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
    PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
    final PartitionFetcher<HiveTablePartition> partitionFetcher;
    // avoid lambda capture
    final ObjectPath tableFullPath = tablePath;
    if (catalogTable.getPartitionKeys().isEmpty()) {
        // non-partitioned table, the fetcher fetches the partition which represents the given
        // table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            return partValueList;
        };
    } else if (isStreamingSource()) {
        // streaming-read partitioned table, the fetcher fetches the latest partition of the
        // given table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            // fetch latest partitions for partitioned table
            if (comparablePartitionValues.size() > 0) {
                // sort in desc order
                comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
                PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
                partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            } else {
                throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
            }
            return partValueList;
        };
    } else {
        // bounded-read partitioned table, the fetcher fetches all partitions of the given
        // filesystem table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
                partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            }
            return partValueList;
        };
    }
    PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
    return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
Also used : HivePartitionUtils(org.apache.flink.connectors.hive.util.HivePartitionUtils) TableFunction(org.apache.flink.table.functions.TableFunction) PartitionReader(org.apache.flink.connector.file.table.PartitionReader) DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) LoggerFactory(org.slf4j.LoggerFactory) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) JobConfUtils(org.apache.flink.connectors.hive.util.JobConfUtils) RowType(org.apache.flink.table.types.logical.RowType) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) ReadableConfig(org.apache.flink.configuration.ReadableConfig) Duration(java.time.Duration) HivePartitionFetcherContextBase(org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) STREAMING_SOURCE_CONSUME_START_OFFSET(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Configuration(org.apache.flink.configuration.Configuration) Preconditions(org.apache.flink.util.Preconditions) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) JobConf(org.apache.hadoop.mapred.JobConf) LOOKUP_JOIN_CACHE_TTL(org.apache.flink.connectors.hive.HiveOptions.LOOKUP_JOIN_CACHE_TTL) List(java.util.List) Optional(java.util.Optional) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) ArrayList(java.util.ArrayList) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class PartitionMonitorTest method preparePartitionMonitor.

private void preparePartitionMonitor() {
    List<List<String>> seenPartitionsSinceOffset = new ArrayList<>();
    JobConf jobConf = new JobConf();
    Configuration configuration = new Configuration();
    ObjectPath tablePath = new ObjectPath("testDb", "testTable");
    configuration.setString("streaming-source.consume-order", "create-time");
    HiveContinuousPartitionContext<Partition, Long> fetcherContext = new HiveContinuousPartitionContext<Partition, Long>() {

        @Override
        public HiveTablePartition toHiveTablePartition(Partition partition) {
            StorageDescriptor sd = partition.getSd();
            Map<String, String> partitionColValues = new HashMap<>();
            for (String partCol : partition.getValues()) {
                String[] arr = partCol.split("=");
                Asserts.check(arr.length == 2, "partition string should be key=value format");
                partitionColValues.put(arr[0], arr[1]);
            }
            return new HiveTablePartition(sd, partitionColValues, new Properties());
        }

        @Override
        public ObjectPath getTablePath() {
            return null;
        }

        @Override
        public TypeSerializer<Long> getTypeSerializer() {
            return null;
        }

        @Override
        public Long getConsumeStartOffset() {
            return null;
        }

        @Override
        public void open() throws Exception {
        }

        @Override
        public Optional<Partition> getPartition(List<String> partValues) throws Exception {
            return Optional.empty();
        }

        @Override
        public List<ComparablePartitionValue> getComparablePartitionValueList() throws Exception {
            return null;
        }

        @Override
        public void close() throws Exception {
        }
    };
    ContinuousPartitionFetcher<Partition, Long> continuousPartitionFetcher = new ContinuousPartitionFetcher<Partition, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public List<Tuple2<Partition, Long>> fetchPartitions(Context<Partition, Long> context, Long previousOffset) throws Exception {
            return testPartitionWithOffset.stream().filter(p -> (long) p.getCreateTime() >= previousOffset).map(p -> Tuple2.of(p, (long) p.getCreateTime())).collect(Collectors.toList());
        }

        @Override
        public List<Partition> fetch(PartitionFetcher.Context<Partition> context) throws Exception {
            return null;
        }
    };
    partitionMonitor = new ContinuousHiveSplitEnumerator.PartitionMonitor<>(0L, seenPartitionsSinceOffset, tablePath, configuration, jobConf, continuousPartitionFetcher, fetcherContext);
}
Also used : Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Properties(java.util.Properties) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Asserts(org.apache.http.util.Asserts) Test(org.junit.Test) HashMap(java.util.HashMap) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) List(java.util.List) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) Map(java.util.Map) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) Optional(java.util.Optional) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Collections(java.util.Collections) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Properties(java.util.Properties) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) Partition(org.apache.hadoop.hive.metastore.api.Partition) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 3 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 4 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveCatalogMetadataTestBase method testCreateFunctionCaseInsensitive.

// ------ functions ------
@Test
public void testCreateFunctionCaseInsensitive() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    String functionName = "myUdf";
    ObjectPath functionPath = new ObjectPath(db1, functionName);
    catalog.createFunction(functionPath, createFunction(), false);
    // make sure we can get the function
    catalog.getFunction(functionPath);
    catalog.dropFunction(functionPath, false);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Test(org.junit.Test)

Example 5 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class HiveCatalogITCase method testTableWithPrimaryKey.

@Test
public void testTableWithPrimaryKey() {
    TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
    tableEnv.getConfig().getConfiguration().setInteger(TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
    tableEnv.registerCatalog("catalog1", hiveCatalog);
    tableEnv.useCatalog("catalog1");
    final String createTable = "CREATE TABLE pk_src (\n" + "  uuid varchar(40) not null,\n" + "  price DECIMAL(10, 2),\n" + "  currency STRING,\n" + "  ts6 TIMESTAMP(6),\n" + "  ts AS CAST(ts6 AS TIMESTAMP(3)),\n" + "  WATERMARK FOR ts AS ts,\n" + "  constraint ct1 PRIMARY KEY(uuid) NOT ENFORCED)\n" + "  WITH (\n" + "    'connector.type' = 'filesystem'," + "    'connector.path' = 'file://fakePath'," + "    'format.type' = 'csv')";
    tableEnv.executeSql(createTable);
    TableSchema tableSchema = tableEnv.getCatalog(tableEnv.getCurrentCatalog()).map(catalog -> {
        try {
            final ObjectPath tablePath = ObjectPath.fromString(catalog.getDefaultDatabase() + '.' + "pk_src");
            return catalog.getTable(tablePath).getSchema();
        } catch (TableNotExistException e) {
            return null;
        }
    }).orElse(null);
    assertThat(tableSchema).isNotNull();
    assertThat(tableSchema.getPrimaryKey()).hasValue(UniqueConstraint.primaryKey("ct1", Collections.singletonList("uuid")));
    tableEnv.executeSql("DROP TABLE pk_src");
}
Also used : Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) FileUtils(org.apache.flink.util.FileUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) CatalogTable(org.apache.flink.table.catalog.CatalogTable) FLINK_PROPERTY_PREFIX(org.apache.flink.table.catalog.CatalogPropertiesUtil.FLINK_PROPERTY_PREFIX) Future(java.util.concurrent.Future) Map(java.util.Map) URI(java.net.URI) Path(java.nio.file.Path) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) Expressions.$(org.apache.flink.table.api.Expressions.$) TableSchema(org.apache.flink.table.api.TableSchema) Table(org.apache.flink.table.api.Table) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) Executors(java.util.concurrent.Executors) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) ManagedTableFactory(org.apache.flink.table.factories.ManagedTableFactory) Row(org.apache.flink.types.Row) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM(org.apache.flink.table.api.config.ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) CatalogView(org.apache.flink.table.catalog.CatalogView) Catalog(org.apache.flink.table.catalog.Catalog) TestManagedTableFactory(org.apache.flink.table.factories.TestManagedTableFactory) ExecutorService(java.util.concurrent.ExecutorService) AbstractDataType(org.apache.flink.table.types.AbstractDataType) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) PrintStream(java.io.PrintStream) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) Files(java.nio.file.Files) Configuration(org.apache.flink.configuration.Configuration) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CollectionUtil(org.apache.flink.util.CollectionUtil) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) CONNECTOR(org.apache.flink.table.factories.FactoryUtil.CONNECTOR) Rule(org.junit.Rule) CoreOptions(org.apache.flink.configuration.CoreOptions) Paths(java.nio.file.Paths) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Comparator(java.util.Comparator) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Test(org.junit.Test)

Aggregations

ObjectPath (org.apache.flink.table.catalog.ObjectPath)81 Test (org.junit.Test)52 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)32 CatalogTable (org.apache.flink.table.catalog.CatalogTable)29 HashMap (java.util.HashMap)21 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)20 TableSchema (org.apache.flink.table.api.TableSchema)19 TableEnvironment (org.apache.flink.table.api.TableEnvironment)17 CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)12 Table (org.apache.hadoop.hive.metastore.api.Table)12 Configuration (org.apache.flink.configuration.Configuration)11 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)11 TableNotExistException (org.apache.flink.table.catalog.exceptions.TableNotExistException)9 ArrayList (java.util.ArrayList)8 Map (java.util.Map)8 GenericInMemoryCatalog (org.apache.flink.table.catalog.GenericInMemoryCatalog)8 LinkedHashMap (java.util.LinkedHashMap)7 Catalog (org.apache.flink.table.catalog.Catalog)7 ContextResolvedTable (org.apache.flink.table.catalog.ContextResolvedTable)6 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)6