use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.
the class HiveParser method parse.
@Override
public List<Operation> parse(String statement) {
CatalogManager catalogManager = getCatalogManager();
Catalog currentCatalog = catalogManager.getCatalog(catalogManager.getCurrentCatalog()).orElse(null);
if (!(currentCatalog instanceof HiveCatalog)) {
LOG.warn("Current catalog is not HiveCatalog. Falling back to Flink's planner.");
return super.parse(statement);
}
HiveConf hiveConf = new HiveConf(((HiveCatalog) currentCatalog).getHiveConf());
hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
hiveConf.set("hive.allow.udf.load.on.demand", "false");
hiveConf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
HiveShim hiveShim = HiveShimLoader.loadHiveShim(((HiveCatalog) currentCatalog).getHiveVersion());
try {
// creates SessionState
startSessionState(hiveConf, catalogManager);
// We override Hive's grouping function. Refer to the implementation for more details.
hiveShim.registerTemporaryFunction("grouping", HiveGenericUDFGrouping.class);
return processCmd(statement, hiveConf, hiveShim, (HiveCatalog) currentCatalog);
} finally {
clearSessionState();
}
}
use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.
the class HivePartitionFetcherTest method testIgnoreNonExistPartition.
@Test
public void testIgnoreNonExistPartition() throws Exception {
// it's possible a partition path exists but the partition is not added to HMS, e.g. the
// partition is still being loaded, or the path is simply misplaced
// make sure the fetch can ignore such paths
HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
hiveCatalog.open();
// create test table
String[] fieldNames = new String[] { "i", "date" };
DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
List<String> partitionKeys = Collections.singletonList("date");
Map<String, String> options = new HashMap<>();
options.put("connector", "hive");
CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
ObjectPath tablePath = new ObjectPath("default", "test");
hiveCatalog.createTable(tablePath, catalogTable, false);
// add a valid partition path
Table hiveTable = hiveCatalog.getHiveTable(tablePath);
Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
fs.mkdirs(path);
// test partition-time order
Configuration flinkConf = new Configuration();
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test create-time order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test partition-name order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.
the class HiveLookupJoinITCase method testPartitionFetcherAndReader.
@Test
public void testPartitionFetcherAndReader() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
lookupFunction.open(null);
PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
List<HiveTablePartition> partitions = fetcher.fetch(context);
// fetch latest partition by partition-name
assertEquals(1, partitions.size());
PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
reader.open(partitions);
List<RowData> res = new ArrayList<>();
ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
RowData row;
while ((row = reader.read(reuse)) != null) {
res.add(serializer.copy(row));
}
res.sort(Comparator.comparingInt(o -> o.getInt(0)));
assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.
the class HiveTableSinkITCase method testBatchAppend.
@Test
public void testBatchAppend() throws Exception {
TableEnvironment tEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
tEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
tEnv.useCatalog(hiveCatalog.getName());
tEnv.executeSql("create database db1");
tEnv.useDatabase("db1");
try {
tEnv.executeSql("create table append_table (i int, j int)");
tEnv.executeSql("insert into append_table select 1, 1").await();
tEnv.executeSql("insert into append_table select 2, 2").await();
List<Row> rows = CollectionUtil.iteratorToList(tEnv.executeSql("select * from append_table").collect());
rows.sort(Comparator.comparingInt(o -> (int) o.getField(0)));
Assert.assertEquals(Arrays.asList(Row.of(1, 1), Row.of(2, 2)), rows);
} finally {
tEnv.executeSql("drop database db1 cascade");
}
}
use of org.apache.flink.table.catalog.hive.HiveCatalog in project flink by apache.
the class HiveInputFormatPartitionReaderITCase method testReadMultipleSplits.
@Test
public void testReadMultipleSplits() throws Exception {
HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
tableEnv.useCatalog(hiveCatalog.getName());
if (!HiveShimLoader.getHiveVersion().startsWith("2.0")) {
testReadFormat(tableEnv, hiveCatalog, "orc");
}
testReadFormat(tableEnv, hiveCatalog, "parquet");
}
Aggregations