use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveLookupTableSource method getLookupFunction.
private TableFunction<RowData> getLookupFunction(int[] keys) {
final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
final PartitionFetcher<HiveTablePartition> partitionFetcher;
// avoid lambda capture
final ObjectPath tableFullPath = tablePath;
if (catalogTable.getPartitionKeys().isEmpty()) {
// non-partitioned table, the fetcher fetches the partition which represents the given
// table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
return partValueList;
};
} else if (isStreamingSource()) {
// streaming-read partitioned table, the fetcher fetches the latest partition of the
// given table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
// fetch latest partitions for partitioned table
if (comparablePartitionValues.size() > 0) {
// sort in desc order
comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
} else {
throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
}
return partValueList;
};
} else {
// bounded-read partitioned table, the fetcher fetches all partitions of the given
// filesystem table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
}
return partValueList;
};
}
PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class PartitionMonitorTest method preparePartitionMonitor.
private void preparePartitionMonitor() {
List<List<String>> seenPartitionsSinceOffset = new ArrayList<>();
JobConf jobConf = new JobConf();
Configuration configuration = new Configuration();
ObjectPath tablePath = new ObjectPath("testDb", "testTable");
configuration.setString("streaming-source.consume-order", "create-time");
HiveContinuousPartitionContext<Partition, Long> fetcherContext = new HiveContinuousPartitionContext<Partition, Long>() {
@Override
public HiveTablePartition toHiveTablePartition(Partition partition) {
StorageDescriptor sd = partition.getSd();
Map<String, String> partitionColValues = new HashMap<>();
for (String partCol : partition.getValues()) {
String[] arr = partCol.split("=");
Asserts.check(arr.length == 2, "partition string should be key=value format");
partitionColValues.put(arr[0], arr[1]);
}
return new HiveTablePartition(sd, partitionColValues, new Properties());
}
@Override
public ObjectPath getTablePath() {
return null;
}
@Override
public TypeSerializer<Long> getTypeSerializer() {
return null;
}
@Override
public Long getConsumeStartOffset() {
return null;
}
@Override
public void open() throws Exception {
}
@Override
public Optional<Partition> getPartition(List<String> partValues) throws Exception {
return Optional.empty();
}
@Override
public List<ComparablePartitionValue> getComparablePartitionValueList() throws Exception {
return null;
}
@Override
public void close() throws Exception {
}
};
ContinuousPartitionFetcher<Partition, Long> continuousPartitionFetcher = new ContinuousPartitionFetcher<Partition, Long>() {
private static final long serialVersionUID = 1L;
@Override
public List<Tuple2<Partition, Long>> fetchPartitions(Context<Partition, Long> context, Long previousOffset) throws Exception {
return testPartitionWithOffset.stream().filter(p -> (long) p.getCreateTime() >= previousOffset).map(p -> Tuple2.of(p, (long) p.getCreateTime())).collect(Collectors.toList());
}
@Override
public List<Partition> fetch(PartitionFetcher.Context<Partition> context) throws Exception {
return null;
}
};
partitionMonitor = new ContinuousHiveSplitEnumerator.PartitionMonitor<>(0L, seenPartitionsSinceOffset, tablePath, configuration, jobConf, continuousPartitionFetcher, fetcherContext);
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HivePartitionFetcherTest method testIgnoreNonExistPartition.
@Test
public void testIgnoreNonExistPartition() throws Exception {
// it's possible a partition path exists but the partition is not added to HMS, e.g. the
// partition is still being loaded, or the path is simply misplaced
// make sure the fetch can ignore such paths
HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
hiveCatalog.open();
// create test table
String[] fieldNames = new String[] { "i", "date" };
DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
List<String> partitionKeys = Collections.singletonList("date");
Map<String, String> options = new HashMap<>();
options.put("connector", "hive");
CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
ObjectPath tablePath = new ObjectPath("default", "test");
hiveCatalog.createTable(tablePath, catalogTable, false);
// add a valid partition path
Table hiveTable = hiveCatalog.getHiveTable(tablePath);
Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
fs.mkdirs(path);
// test partition-time order
Configuration flinkConf = new Configuration();
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test create-time order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test partition-name order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveCatalogMetadataTestBase method testCreateFunctionCaseInsensitive.
// ------ functions ------
@Test
public void testCreateFunctionCaseInsensitive() throws Exception {
catalog.createDatabase(db1, createDb(), false);
String functionName = "myUdf";
ObjectPath functionPath = new ObjectPath(db1, functionName);
catalog.createFunction(functionPath, createFunction(), false);
// make sure we can get the function
catalog.getFunction(functionPath);
catalog.dropFunction(functionPath, false);
}
use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.
the class HiveCatalogITCase method testTableWithPrimaryKey.
@Test
public void testTableWithPrimaryKey() {
TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
tableEnv.getConfig().getConfiguration().setInteger(TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
tableEnv.registerCatalog("catalog1", hiveCatalog);
tableEnv.useCatalog("catalog1");
final String createTable = "CREATE TABLE pk_src (\n" + " uuid varchar(40) not null,\n" + " price DECIMAL(10, 2),\n" + " currency STRING,\n" + " ts6 TIMESTAMP(6),\n" + " ts AS CAST(ts6 AS TIMESTAMP(3)),\n" + " WATERMARK FOR ts AS ts,\n" + " constraint ct1 PRIMARY KEY(uuid) NOT ENFORCED)\n" + " WITH (\n" + " 'connector.type' = 'filesystem'," + " 'connector.path' = 'file://fakePath'," + " 'format.type' = 'csv')";
tableEnv.executeSql(createTable);
TableSchema tableSchema = tableEnv.getCatalog(tableEnv.getCurrentCatalog()).map(catalog -> {
try {
final ObjectPath tablePath = ObjectPath.fromString(catalog.getDefaultDatabase() + '.' + "pk_src");
return catalog.getTable(tablePath).getSchema();
} catch (TableNotExistException e) {
return null;
}
}).orElse(null);
assertThat(tableSchema).isNotNull();
assertThat(tableSchema.getPrimaryKey()).hasValue(UniqueConstraint.primaryKey("ct1", Collections.singletonList("uuid")));
tableEnv.executeSql("DROP TABLE pk_src");
}
Aggregations