use of org.apache.flink.connector.file.table.PartitionReader in project flink by apache.
the class HiveLookupTableSource method getLookupFunction.
private TableFunction<RowData> getLookupFunction(int[] keys) {
final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
final PartitionFetcher<HiveTablePartition> partitionFetcher;
// avoid lambda capture
final ObjectPath tableFullPath = tablePath;
if (catalogTable.getPartitionKeys().isEmpty()) {
// non-partitioned table, the fetcher fetches the partition which represents the given
// table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
return partValueList;
};
} else if (isStreamingSource()) {
// streaming-read partitioned table, the fetcher fetches the latest partition of the
// given table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
// fetch latest partitions for partitioned table
if (comparablePartitionValues.size() > 0) {
// sort in desc order
comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
} else {
throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
}
return partValueList;
};
} else {
// bounded-read partitioned table, the fetcher fetches all partitions of the given
// filesystem table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
}
return partValueList;
};
}
PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
use of org.apache.flink.connector.file.table.PartitionReader in project flink by apache.
the class HiveLookupJoinITCase method testPartitionFetcherAndReader.
@Test
public void testPartitionFetcherAndReader() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
lookupFunction.open(null);
PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
List<HiveTablePartition> partitions = fetcher.fetch(context);
// fetch latest partition by partition-name
assertEquals(1, partitions.size());
PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
reader.open(partitions);
List<RowData> res = new ArrayList<>();
ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
RowData row;
while ((row = reader.read(reuse)) != null) {
res.add(serializer.copy(row));
}
res.sort(Comparator.comparingInt(o -> o.getInt(0)));
assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Aggregations