Search in sources :

Example 1 with HiveCatalog

use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.

the class HiveParser method parse.

@Override
public List<Operation> parse(String statement) {
    CatalogManager catalogManager = getCatalogManager();
    Catalog currentCatalog = catalogManager.getCatalog(catalogManager.getCurrentCatalog()).orElse(null);
    if (!(currentCatalog instanceof HiveCatalog)) {
        LOG.warn("Current catalog is not HiveCatalog. Falling back to Flink's planner.");
        return super.parse(statement);
    }
    HiveConf hiveConf = new HiveConf(((HiveCatalog) currentCatalog).getHiveConf());
    hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    hiveConf.set("hive.allow.udf.load.on.demand", "false");
    hiveConf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(((HiveCatalog) currentCatalog).getHiveVersion());
    try {
        // creates SessionState
        startSessionState(hiveConf, catalogManager);
        // We override Hive's grouping function. Refer to the implementation for more details.
        hiveShim.registerTemporaryFunction("grouping", HiveGenericUDFGrouping.class);
        return processCmd(statement, hiveConf, hiveShim, (HiveCatalog) currentCatalog);
    } finally {
        clearSessionState();
    }
}
Also used : HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) CatalogManager(org.apache.flink.table.catalog.CatalogManager) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) Catalog(org.apache.flink.table.catalog.Catalog)

Example 2 with HiveCatalog

use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 3 with HiveCatalog

use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.

the class HiveLookupJoinITCase method testPartitionFetcherAndReader.

@Test
public void testPartitionFetcherAndReader() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
    lookupFunction.open(null);
    PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
    PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
    List<HiveTablePartition> partitions = fetcher.fetch(context);
    // fetch latest partition by partition-name
    assertEquals(1, partitions.size());
    PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
    reader.open(partitions);
    List<RowData> res = new ArrayList<>();
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
    TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
    RowData row;
    while ((row = reader.read(reuse)) != null) {
        res.add(serializer.copy(row));
    }
    res.sort(Comparator.comparingInt(o -> o.getInt(0)));
    assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Also used : PartitionReader(org.apache.flink.connector.file.table.PartitionReader) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) Arrays(java.util.Arrays) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) CatalogTable(org.apache.flink.table.catalog.CatalogTable) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ArrayList(java.util.ArrayList) GenericRowData(org.apache.flink.table.data.GenericRowData) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) Duration(java.time.Duration) DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) TableImpl(org.apache.flink.table.api.internal.TableImpl) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) STREAMING_SOURCE_ENABLE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_ENABLE) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Row(org.apache.flink.types.Row) Comparator(java.util.Comparator) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) GenericRowData(org.apache.flink.table.data.GenericRowData) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Example 4 with HiveCatalog

use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.

the class HiveTableSinkITCase method testBatchAppend.

@Test
public void testBatchAppend() throws Exception {
    TableEnvironment tEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    tEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tEnv.useCatalog(hiveCatalog.getName());
    tEnv.executeSql("create database db1");
    tEnv.useDatabase("db1");
    try {
        tEnv.executeSql("create table append_table (i int, j int)");
        tEnv.executeSql("insert into append_table select 1, 1").await();
        tEnv.executeSql("insert into append_table select 2, 2").await();
        List<Row> rows = CollectionUtil.iteratorToList(tEnv.executeSql("select * from append_table").collect());
        rows.sort(Comparator.comparingInt(o -> (int) o.getField(0)));
        Assert.assertEquals(Arrays.asList(Row.of(1, 1), Row.of(2, 2)), rows);
    } finally {
        tEnv.executeSql("drop database db1 cascade");
    }
}
Also used : StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) FiniteTestSource(org.apache.flink.streaming.util.FiniteTestSource) ExplainDetail(org.apache.flink.table.api.ExplainDetail) ExceptionUtils(org.apache.flink.util.ExceptionUtils) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Expressions(org.apache.flink.table.api.Expressions) TableTestUtil.replaceStreamNodeId(org.apache.flink.table.planner.utils.TableTestUtil.replaceStreamNodeId) Path(org.apache.flink.core.fs.Path) Map(java.util.Map) SINK_PARTITION_COMMIT_DELAY(org.apache.flink.connector.file.table.FileSystemConnectorOptions.SINK_PARTITION_COMMIT_DELAY) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) SINK_PARTITION_COMMIT_POLICY_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.SINK_PARTITION_COMMIT_POLICY_KIND) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) Expressions.$(org.apache.flink.table.api.Expressions.$) Set(java.util.Set) Table(org.apache.flink.table.api.Table) ZoneId(java.time.ZoneId) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Row(org.apache.flink.types.Row) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) BeforeClass(org.junit.BeforeClass) TableTestUtil.replaceStageId(org.apache.flink.table.planner.utils.TableTestUtil.replaceStageId) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) ObjectPath(org.apache.flink.table.catalog.ObjectPath) ArrayList(java.util.ArrayList) TableTestUtil.readFromResource(org.apache.flink.table.planner.utils.TableTestUtil.readFromResource) Types(org.apache.flink.api.common.typeinfo.Types) TableTestUtil.replaceNodeIdInOperator(org.apache.flink.table.planner.utils.TableTestUtil.replaceNodeIdInOperator) Iterator(java.util.Iterator) Assert.assertTrue(org.junit.Assert.assertTrue) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) CollectionUtil(org.apache.flink.util.CollectionUtil) File(java.io.File) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Consumer(java.util.function.Consumer) SINK_PARTITION_COMMIT_POLICY_CLASS(org.apache.flink.connector.file.table.FileSystemConnectorOptions.SINK_PARTITION_COMMIT_POLICY_CLASS) SqlDialect(org.apache.flink.table.api.SqlDialect) Assert(org.junit.Assert) Comparator(java.util.Comparator) SINK_PARTITION_COMMIT_SUCCESS_FILE_NAME(org.apache.flink.connector.file.table.FileSystemConnectorOptions.SINK_PARTITION_COMMIT_SUCCESS_FILE_NAME) Assert.assertEquals(org.junit.Assert.assertEquals) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 5 with HiveCatalog

use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.

the class HiveInputFormatPartitionReaderITCase method testReadMultipleSplits.

@Test
public void testReadMultipleSplits() throws Exception {
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tableEnv.useCatalog(hiveCatalog.getName());
    if (!HiveShimLoader.getHiveVersion().startsWith("2.0")) {
        testReadFormat(tableEnv, hiveCatalog, "orc");
    }
    testReadFormat(tableEnv, hiveCatalog, "parquet");
}
Also used : HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Test(org.junit.Test)

Aggregations

HiveCatalog (org.apache.flink.table.catalog.hive.HiveCatalog)10 Test (org.junit.Test)8 HashMap (java.util.HashMap)6 TableEnvironment (org.apache.flink.table.api.TableEnvironment)4 File (java.io.File)3 Catalog (org.apache.flink.table.catalog.Catalog)3 Row (org.apache.flink.types.Row)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Comparator (java.util.Comparator)2 List (java.util.List)2 PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN (org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN)2 SqlDialect (org.apache.flink.table.api.SqlDialect)2 CatalogTable (org.apache.flink.table.catalog.CatalogTable)2 ObjectPath (org.apache.flink.table.catalog.ObjectPath)2 HiveTestUtils (org.apache.flink.table.catalog.hive.HiveTestUtils)2 HiveShim (org.apache.flink.table.catalog.hive.client.HiveShim)2 CollectionUtil (org.apache.flink.util.CollectionUtil)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 AfterClass (org.junit.AfterClass)2