use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HivePartitionFetcherTest method testIgnoreNonExistPartition.
@Test
public void testIgnoreNonExistPartition() throws Exception {
// it's possible a partition path exists but the partition is not added to HMS, e.g. the
// partition is still being loaded, or the path is simply misplaced
// make sure the fetch can ignore such paths
HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
hiveCatalog.open();
// create test table
String[] fieldNames = new String[] { "i", "date" };
DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
List<String> partitionKeys = Collections.singletonList("date");
Map<String, String> options = new HashMap<>();
options.put("connector", "hive");
CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
ObjectPath tablePath = new ObjectPath("default", "test");
hiveCatalog.createTable(tablePath, catalogTable, false);
// add a valid partition path
Table hiveTable = hiveCatalog.getHiveTable(tablePath);
Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
fs.mkdirs(path);
// test partition-time order
Configuration flinkConf = new Configuration();
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test create-time order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test partition-name order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveParserTypeConverter method convert.
public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
RelDataType convertedType = null;
HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
switch(type.getPrimitiveCategory()) {
case VOID:
convertedType = dtFactory.createSqlType(SqlTypeName.NULL);
break;
case BOOLEAN:
convertedType = dtFactory.createSqlType(SqlTypeName.BOOLEAN);
break;
case BYTE:
convertedType = dtFactory.createSqlType(SqlTypeName.TINYINT);
break;
case SHORT:
convertedType = dtFactory.createSqlType(SqlTypeName.SMALLINT);
break;
case INT:
convertedType = dtFactory.createSqlType(SqlTypeName.INTEGER);
break;
case LONG:
convertedType = dtFactory.createSqlType(SqlTypeName.BIGINT);
break;
case FLOAT:
convertedType = dtFactory.createSqlType(SqlTypeName.FLOAT);
break;
case DOUBLE:
convertedType = dtFactory.createSqlType(SqlTypeName.DOUBLE);
break;
case STRING:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, Integer.MAX_VALUE), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case DATE:
convertedType = dtFactory.createSqlType(SqlTypeName.DATE);
break;
case TIMESTAMP:
convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP, 9);
break;
case BINARY:
convertedType = dtFactory.createSqlType(SqlTypeName.BINARY);
break;
case DECIMAL:
DecimalTypeInfo dtInf = (DecimalTypeInfo) type;
convertedType = dtFactory.createSqlType(SqlTypeName.DECIMAL, dtInf.precision(), dtInf.scale());
break;
case VARCHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.VARCHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case CHAR:
convertedType = dtFactory.createTypeWithCharsetAndCollation(dtFactory.createSqlType(SqlTypeName.CHAR, ((BaseCharTypeInfo) type).getLength()), Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME), SqlCollation.IMPLICIT);
break;
case UNKNOWN:
convertedType = dtFactory.createSqlType(SqlTypeName.OTHER);
break;
default:
if (hiveShim.isIntervalYearMonthType(type.getPrimitiveCategory())) {
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
} else if (hiveShim.isIntervalDayTimeType(type.getPrimitiveCategory())) {
convertedType = dtFactory.createSqlIntervalType(new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1, 1)));
}
}
if (null == convertedType) {
throw new RuntimeException("Unsupported Type : " + type.getTypeName());
}
return dtFactory.createTypeWithNullability(convertedType, true);
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveTableSink method createBatchSink.
private DataStreamSink<Row> createBatchSink(DataStream<RowData> dataStream, DataStructureConverter converter, StorageDescriptor sd, HiveWriterFactory recordWriterFactory, OutputFileConfig fileNaming, final int parallelism) throws IOException {
FileSystemOutputFormat.Builder<Row> builder = new FileSystemOutputFormat.Builder<>();
builder.setPartitionComputer(new HiveRowPartitionComputer(hiveShim, JobConfUtils.getDefaultPartitionName(jobConf), tableSchema.getFieldNames(), tableSchema.getFieldDataTypes(), getPartitionKeyArray()));
builder.setDynamicGrouped(dynamicGrouping);
builder.setPartitionColumns(getPartitionKeyArray());
builder.setFileSystemFactory(fsFactory());
builder.setFormatFactory(new HiveOutputFormatFactory(recordWriterFactory));
builder.setMetaStoreFactory(msFactory());
builder.setOverwrite(overwrite);
builder.setStaticPartitions(staticPartitionSpec);
builder.setTempPath(new org.apache.flink.core.fs.Path(toStagingDir(sd.getLocation(), jobConf)));
builder.setOutputFileConfig(fileNaming);
return dataStream.map((MapFunction<RowData, Row>) value -> (Row) converter.toExternal(value)).writeUsingOutputFormat(builder.build()).setParallelism(parallelism);
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class SqlFunctionConverter method visitCall.
@Override
public RexNode visitCall(RexCall call) {
SqlOperator operator = call.getOperator();
List<RexNode> operands = call.getOperands();
SqlOperator convertedOp = convertOperator(operator);
final boolean[] update = null;
if (convertedOp instanceof SqlCastFunction) {
RelDataType type = call.getType();
return builder.makeCall(type, convertedOp, visitList(operands, update));
} else {
if (convertedOp instanceof FlinkSqlTimestampFunction) {
// flink's current_timestamp has different type from hive's, convert it to a literal
Timestamp currentTS = ((HiveParser.HiveParserSessionState) SessionState.get()).getHiveParserCurrentTS();
HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
try {
return HiveParserRexNodeConverter.convertConstant(new ExprNodeConstantDesc(hiveShim.toHiveTimestamp(currentTS)), cluster);
} catch (SemanticException e) {
throw new FlinkHiveException(e);
}
}
return builder.makeCall(convertedOp, visitList(operands, update));
}
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveInspectors method getConversion.
/**
* Get conversion for converting Flink object to Hive object from an ObjectInspector and the
* corresponding Flink DataType.
*/
public static HiveObjectConversion getConversion(ObjectInspector inspector, LogicalType dataType, HiveShim hiveShim) {
if (inspector instanceof PrimitiveObjectInspector) {
HiveObjectConversion conversion;
if (inspector instanceof BooleanObjectInspector || inspector instanceof StringObjectInspector || inspector instanceof ByteObjectInspector || inspector instanceof ShortObjectInspector || inspector instanceof IntObjectInspector || inspector instanceof LongObjectInspector || inspector instanceof FloatObjectInspector || inspector instanceof DoubleObjectInspector || inspector instanceof BinaryObjectInspector || inspector instanceof VoidObjectInspector) {
conversion = IdentityConversion.INSTANCE;
} else if (inspector instanceof DateObjectInspector) {
conversion = hiveShim::toHiveDate;
} else if (inspector instanceof TimestampObjectInspector) {
conversion = hiveShim::toHiveTimestamp;
} else if (inspector instanceof HiveCharObjectInspector) {
conversion = o -> o == null ? null : new HiveChar((String) o, ((CharType) dataType).getLength());
} else if (inspector instanceof HiveVarcharObjectInspector) {
conversion = o -> o == null ? null : new HiveVarchar((String) o, ((VarCharType) dataType).getLength());
} else if (inspector instanceof HiveDecimalObjectInspector) {
conversion = o -> o == null ? null : HiveDecimal.create((BigDecimal) o);
} else {
throw new FlinkHiveUDFException("Unsupported primitive object inspector " + inspector.getClass().getName());
}
// currently this happens for constant arguments for UDFs
if (((PrimitiveObjectInspector) inspector).preferWritable()) {
conversion = new WritableHiveObjectConversion(conversion, hiveShim);
}
return conversion;
}
if (inspector instanceof ListObjectInspector) {
HiveObjectConversion eleConvert = getConversion(((ListObjectInspector) inspector).getListElementObjectInspector(), ((ArrayType) dataType).getElementType(), hiveShim);
return o -> {
if (o == null) {
return null;
}
Object[] array = (Object[]) o;
List<Object> result = new ArrayList<>();
for (Object ele : array) {
result.add(eleConvert.toHiveObject(ele));
}
return result;
};
}
if (inspector instanceof MapObjectInspector) {
MapObjectInspector mapInspector = (MapObjectInspector) inspector;
MapType kvType = (MapType) dataType;
HiveObjectConversion keyConversion = getConversion(mapInspector.getMapKeyObjectInspector(), kvType.getKeyType(), hiveShim);
HiveObjectConversion valueConversion = getConversion(mapInspector.getMapValueObjectInspector(), kvType.getValueType(), hiveShim);
return o -> {
if (o == null) {
return null;
}
Map<Object, Object> map = (Map) o;
Map<Object, Object> result = new HashMap<>(map.size());
for (Map.Entry<Object, Object> entry : map.entrySet()) {
result.put(keyConversion.toHiveObject(entry.getKey()), valueConversion.toHiveObject(entry.getValue()));
}
return result;
};
}
if (inspector instanceof StructObjectInspector) {
StructObjectInspector structInspector = (StructObjectInspector) inspector;
List<? extends StructField> structFields = structInspector.getAllStructFieldRefs();
List<RowType.RowField> rowFields = ((RowType) dataType).getFields();
HiveObjectConversion[] conversions = new HiveObjectConversion[structFields.size()];
for (int i = 0; i < structFields.size(); i++) {
conversions[i] = getConversion(structFields.get(i).getFieldObjectInspector(), rowFields.get(i).getType(), hiveShim);
}
return o -> {
if (o == null) {
return null;
}
Row row = (Row) o;
List<Object> result = new ArrayList<>(row.getArity());
for (int i = 0; i < row.getArity(); i++) {
result.add(conversions[i].toHiveObject(row.getField(i)));
}
return result;
};
}
throw new FlinkHiveUDFException(String.format("Flink doesn't support convert object conversion for %s yet", inspector));
}
Aggregations