Search in sources :

Example 1 with PartitionOrder

use of org.apache.flink.connectors.hive.HiveOptions.PartitionOrder in project flink by apache.

the class HivePartitionFetcherContextBase method getComparablePartitionValueList.

@Override
public List<ComparablePartitionValue> getComparablePartitionValueList() throws Exception {
    List<ComparablePartitionValue> partitionValueList = new ArrayList<>();
    switch(partitionOrder) {
        case PARTITION_NAME:
            List<String> partitionNames = metaStoreClient.listPartitionNames(tablePath.getDatabaseName(), tablePath.getObjectName(), Short.MAX_VALUE);
            for (String partitionName : partitionNames) {
                partitionValueList.add(getComparablePartitionByName(partitionName));
            }
            break;
        case CREATE_TIME:
            partitionNames = metaStoreClient.listPartitionNames(tablePath.getDatabaseName(), tablePath.getObjectName(), Short.MAX_VALUE);
            List<String> newNames = partitionNames.stream().filter(n -> !partValuesToCreateTime.containsKey(extractPartitionValues(n))).collect(Collectors.toList());
            List<Partition> newPartitions = metaStoreClient.getPartitionsByNames(tablePath.getDatabaseName(), tablePath.getObjectName(), newNames);
            for (Partition partition : newPartitions) {
                partValuesToCreateTime.put(partition.getValues(), getPartitionCreateTime(partition));
            }
            for (List<String> partValues : partValuesToCreateTime.keySet()) {
                partitionValueList.add(getComparablePartitionByTime(partValues, partValuesToCreateTime.get(partValues)));
            }
            break;
        case PARTITION_TIME:
            partitionNames = metaStoreClient.listPartitionNames(tablePath.getDatabaseName(), tablePath.getObjectName(), Short.MAX_VALUE);
            for (String partitionName : partitionNames) {
                List<String> partValues = extractPartitionValues(partitionName);
                Long partitionTime = toMills(extractor.extract(partitionKeys, partValues));
                partitionValueList.add(getComparablePartitionByTime(partValues, partitionTime));
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported partition order: " + partitionOrder);
    }
    return partitionValueList;
}
Also used : DataType(org.apache.flink.table.types.DataType) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) FileSystem(org.apache.hadoop.fs.FileSystem) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) PARTITION_TIME_EXTRACTOR_TIMESTAMP_FORMATTER(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_FORMATTER) PARTITION_TIME_EXTRACTOR_CLASS(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_CLASS) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) PartitionPathUtils(org.apache.flink.table.utils.PartitionPathUtils) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) HiveMetastoreClientWrapper(org.apache.flink.table.catalog.hive.client.HiveMetastoreClientWrapper) HiveConfUtils(org.apache.flink.connectors.hive.util.HiveConfUtils) DefaultPartTimeExtractor.toMills(org.apache.flink.connector.file.table.DefaultPartTimeExtractor.toMills) Properties(java.util.Properties) TimestampData(org.apache.flink.table.data.TimestampData) Timestamp(java.sql.Timestamp) Configuration(org.apache.flink.configuration.Configuration) HiveTablePartition(org.apache.flink.connectors.hive.HiveTablePartition) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionTimeExtractor(org.apache.flink.connector.file.table.PartitionTimeExtractor) List(java.util.List) HiveReflectionUtils(org.apache.flink.table.catalog.hive.util.HiveReflectionUtils) PartitionOrder(org.apache.flink.connectors.hive.HiveOptions.PartitionOrder) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveTablePartition(org.apache.flink.connectors.hive.HiveTablePartition) ArrayList(java.util.ArrayList)

Example 2 with PartitionOrder

use of org.apache.flink.connectors.hive.HiveOptions.PartitionOrder in project flink by apache.

the class HiveDynamicTableFactoryTest method testHiveStreamingSourceOptions.

@Test
public void testHiveStreamingSourceOptions() throws Exception {
    // test default hive streaming-source is not a lookup source
    tableEnv.executeSql(String.format("create table table1 (x int, y string, z int) partitioned by (" + " pt_year int, pt_mon string, pt_day string)" + " tblproperties ('%s' = 'true')", STREAMING_SOURCE_ENABLE.key()));
    DynamicTableSource tableSource1 = getTableSource("table1");
    assertFalse(tableSource1 instanceof HiveLookupTableSource);
    HiveTableSource tableSource = (HiveTableSource) tableSource1;
    Configuration configuration = new Configuration();
    tableSource.catalogTable.getOptions().forEach(configuration::setString);
    assertEquals(HiveOptions.PartitionOrder.PARTITION_NAME, configuration.get(STREAMING_SOURCE_PARTITION_ORDER));
    // test table can't be selected when set 'streaming-source.partition.include' to 'latest'
    tableEnv.executeSql(String.format("create table table2 (x int, y string, z int) partitioned by (" + " pt_year int, pt_mon string, pt_day string)" + " tblproperties ('%s' = 'true', '%s' = 'latest')", STREAMING_SOURCE_ENABLE.key(), STREAMING_SOURCE_PARTITION_INCLUDE.key()));
    DynamicTableSource tableSource2 = getTableSource("table2");
    assertTrue(tableSource2 instanceof HiveLookupTableSource);
    try {
        tableEnv.executeSql("select * from table2");
    } catch (Throwable t) {
        assertTrue(ExceptionUtils.findThrowableWithMessage(t, "The only supported 'streaming-source.partition.include' is 'all' in" + " hive table scan, but is 'latest'").isPresent());
    }
    // test table support 'partition-name' in option 'streaming-source.partition.order'.
    tableEnv.executeSql(String.format("create table table3 (x int, y string, z int) partitioned by (" + " pt_year int, pt_mon string, pt_day string)" + " tblproperties ('%s' = 'true', '%s' = 'partition-name')", STREAMING_SOURCE_ENABLE.key(), STREAMING_SOURCE_PARTITION_ORDER.key()));
    DynamicTableSource tableSource3 = getTableSource("table3");
    assertTrue(tableSource3 instanceof HiveTableSource);
    HiveTableSource hiveTableSource3 = (HiveTableSource) tableSource3;
    Configuration configuration1 = new Configuration();
    hiveTableSource3.catalogTable.getOptions().forEach(configuration1::setString);
    PartitionOrder partitionOrder1 = configuration1.get(STREAMING_SOURCE_PARTITION_ORDER);
    assertEquals(HiveOptions.PartitionOrder.PARTITION_NAME, partitionOrder1);
    // test deprecated option key 'streaming-source.consume-order' and new key
    // 'streaming-source.partition-order'
    tableEnv.executeSql(String.format("create table table4 (x int, y string, z int) partitioned by (" + " pt_year int, pt_mon string, pt_day string)" + " tblproperties ('%s' = 'true', '%s' = 'partition-time')", STREAMING_SOURCE_ENABLE.key(), "streaming-source.consume-order"));
    DynamicTableSource tableSource4 = getTableSource("table4");
    assertTrue(tableSource4 instanceof HiveTableSource);
    HiveTableSource hiveTableSource = (HiveTableSource) tableSource4;
    Configuration configuration2 = new Configuration();
    hiveTableSource.catalogTable.getOptions().forEach(configuration2::setString);
    PartitionOrder partitionOrder2 = configuration2.get(STREAMING_SOURCE_PARTITION_ORDER);
    assertEquals(HiveOptions.PartitionOrder.PARTITION_TIME, partitionOrder2);
}
Also used : PartitionOrder(org.apache.flink.connectors.hive.HiveOptions.PartitionOrder) Configuration(org.apache.flink.configuration.Configuration) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) Test(org.junit.Test)

Aggregations

Configuration (org.apache.flink.configuration.Configuration)2 PartitionOrder (org.apache.flink.connectors.hive.HiveOptions.PartitionOrder)2 IOException (java.io.IOException)1 Timestamp (java.sql.Timestamp)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Properties (java.util.Properties)1 Collectors (java.util.stream.Collectors)1 DefaultPartTimeExtractor.toMills (org.apache.flink.connector.file.table.DefaultPartTimeExtractor.toMills)1 PARTITION_TIME_EXTRACTOR_CLASS (org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_CLASS)1 PARTITION_TIME_EXTRACTOR_KIND (org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND)1 PARTITION_TIME_EXTRACTOR_TIMESTAMP_FORMATTER (org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_FORMATTER)1 PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN (org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN)1 PartitionTimeExtractor (org.apache.flink.connector.file.table.PartitionTimeExtractor)1 STREAMING_SOURCE_PARTITION_ORDER (org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER)1 HiveTablePartition (org.apache.flink.connectors.hive.HiveTablePartition)1 JobConfWrapper (org.apache.flink.connectors.hive.JobConfWrapper)1 HiveConfUtils (org.apache.flink.connectors.hive.util.HiveConfUtils)1