Search in sources :

Example 1 with HiveContinuousPartitionContext

use of org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext in project flink by apache.

the class PartitionMonitorTest method preparePartitionMonitor.

private void preparePartitionMonitor() {
    List<List<String>> seenPartitionsSinceOffset = new ArrayList<>();
    JobConf jobConf = new JobConf();
    Configuration configuration = new Configuration();
    ObjectPath tablePath = new ObjectPath("testDb", "testTable");
    configuration.setString("streaming-source.consume-order", "create-time");
    HiveContinuousPartitionContext<Partition, Long> fetcherContext = new HiveContinuousPartitionContext<Partition, Long>() {

        @Override
        public HiveTablePartition toHiveTablePartition(Partition partition) {
            StorageDescriptor sd = partition.getSd();
            Map<String, String> partitionColValues = new HashMap<>();
            for (String partCol : partition.getValues()) {
                String[] arr = partCol.split("=");
                Asserts.check(arr.length == 2, "partition string should be key=value format");
                partitionColValues.put(arr[0], arr[1]);
            }
            return new HiveTablePartition(sd, partitionColValues, new Properties());
        }

        @Override
        public ObjectPath getTablePath() {
            return null;
        }

        @Override
        public TypeSerializer<Long> getTypeSerializer() {
            return null;
        }

        @Override
        public Long getConsumeStartOffset() {
            return null;
        }

        @Override
        public void open() throws Exception {
        }

        @Override
        public Optional<Partition> getPartition(List<String> partValues) throws Exception {
            return Optional.empty();
        }

        @Override
        public List<ComparablePartitionValue> getComparablePartitionValueList() throws Exception {
            return null;
        }

        @Override
        public void close() throws Exception {
        }
    };
    ContinuousPartitionFetcher<Partition, Long> continuousPartitionFetcher = new ContinuousPartitionFetcher<Partition, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public List<Tuple2<Partition, Long>> fetchPartitions(Context<Partition, Long> context, Long previousOffset) throws Exception {
            return testPartitionWithOffset.stream().filter(p -> (long) p.getCreateTime() >= previousOffset).map(p -> Tuple2.of(p, (long) p.getCreateTime())).collect(Collectors.toList());
        }

        @Override
        public List<Partition> fetch(PartitionFetcher.Context<Partition> context) throws Exception {
            return null;
        }
    };
    partitionMonitor = new ContinuousHiveSplitEnumerator.PartitionMonitor<>(0L, seenPartitionsSinceOffset, tablePath, configuration, jobConf, continuousPartitionFetcher, fetcherContext);
}
Also used : Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) Properties(java.util.Properties) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Asserts(org.apache.http.util.Asserts) Test(org.junit.Test) HashMap(java.util.HashMap) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) List(java.util.List) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) Map(java.util.Map) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) Optional(java.util.Optional) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Collections(java.util.Collections) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Properties(java.util.Properties) ContinuousPartitionFetcher(org.apache.flink.connector.file.table.ContinuousPartitionFetcher) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) ArrayList(java.util.ArrayList) List(java.util.List) JobConf(org.apache.hadoop.mapred.JobConf) HiveContinuousPartitionContext(org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext) Partition(org.apache.hadoop.hive.metastore.api.Partition) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Aggregations

ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 Properties (java.util.Properties)1 Collectors (java.util.stream.Collectors)1 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 Configuration (org.apache.flink.configuration.Configuration)1 ContinuousPartitionFetcher (org.apache.flink.connector.file.table.ContinuousPartitionFetcher)1 PartitionFetcher (org.apache.flink.connector.file.table.PartitionFetcher)1 HiveContinuousPartitionContext (org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext)1 ObjectPath (org.apache.flink.table.catalog.ObjectPath)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)1 JobConf (org.apache.hadoop.mapred.JobConf)1