use of org.apache.flink.table.types.DataType in project flink by apache.
the class CsvRowDataSerDeSchemaTest method testSerializationWithTypesMismatch.
@Test
public void testSerializationWithTypesMismatch() {
DataType dataType = ROW(FIELD("f0", STRING()), FIELD("f1", INT()), FIELD("f2", INT()));
RowType rowType = (RowType) dataType.getLogicalType();
CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
RowData rowData = rowData("Test", 1, "Test");
String errorMessage = "Fail to serialize at field: f2.";
try {
serialize(serSchemaBuilder, rowData);
fail("expecting exception message:" + errorMessage);
} catch (Throwable t) {
assertThat(t, FlinkMatchers.containsMessage(errorMessage));
}
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class ArrowUtils method collectAsPandasDataFrame.
/**
* Convert Flink table to Pandas DataFrame.
*/
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
checkArrowUsable();
BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
arrowStreamWriter.start();
Iterator<Row> results = table.execute().collect();
Iterator<Row> appendOnlyResults;
if (isAppendOnlyTable(table)) {
appendOnlyResults = results;
} else {
appendOnlyResults = filterOutRetractRows(results);
}
ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
Iterator convertedResults = new Iterator<RowData>() {
@Override
public boolean hasNext() {
return appendOnlyResults.hasNext();
}
@Override
public RowData next() {
DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
return (RowData) converter.toInternal(appendOnlyResults.next());
}
};
return new CustomIterator<byte[]>() {
@Override
public boolean hasNext() {
return convertedResults.hasNext();
}
@Override
public byte[] next() {
try {
int i = 0;
while (convertedResults.hasNext() && i < maxArrowBatchSize) {
i++;
arrowWriter.write(convertedResults.next());
}
arrowWriter.finish();
arrowStreamWriter.writeBatch();
return baos.toByteArray();
} catch (Throwable t) {
String msg = "Failed to serialize the data of the table";
LOG.error(msg, t);
throw new RuntimeException(msg, t);
} finally {
arrowWriter.reset();
baos.reset();
if (!hasNext()) {
root.close();
allocator.close();
}
}
}
};
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class ElasticsearchDynamicSinkFactoryBase method getPrimaryKeyLogicalTypesWithIndex.
List<LogicalTypeWithIndex> getPrimaryKeyLogicalTypesWithIndex(Context context) {
DataType physicalRowDataType = context.getPhysicalRowDataType();
int[] primaryKeyIndexes = context.getPrimaryKeyIndexes();
if (primaryKeyIndexes.length != 0) {
DataType pkDataType = Projection.of(primaryKeyIndexes).project(physicalRowDataType);
ElasticsearchValidationUtils.validatePrimaryKey(pkDataType);
}
ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema();
return Arrays.stream(primaryKeyIndexes).mapToObj(index -> {
Optional<Column> column = resolvedSchema.getColumn(index);
if (!column.isPresent()) {
throw new IllegalStateException(String.format("No primary key column found with index '%s'.", index));
}
LogicalType logicalType = column.get().getDataType().getLogicalType();
return new LogicalTypeWithIndex(index, logicalType);
}).collect(Collectors.toList());
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class HivePartitionFetcherTest method testIgnoreNonExistPartition.
@Test
public void testIgnoreNonExistPartition() throws Exception {
// it's possible a partition path exists but the partition is not added to HMS, e.g. the
// partition is still being loaded, or the path is simply misplaced
// make sure the fetch can ignore such paths
HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
hiveCatalog.open();
// create test table
String[] fieldNames = new String[] { "i", "date" };
DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
List<String> partitionKeys = Collections.singletonList("date");
Map<String, String> options = new HashMap<>();
options.put("connector", "hive");
CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
ObjectPath tablePath = new ObjectPath("default", "test");
hiveCatalog.createTable(tablePath, catalogTable, false);
// add a valid partition path
Table hiveTable = hiveCatalog.getHiveTable(tablePath);
Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
fs.mkdirs(path);
// test partition-time order
Configuration flinkConf = new Configuration();
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test create-time order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
// test partition-name order
flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
fetcherContext.open();
assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class HiveCatalogDataTypeTest method testNonSupportedBinaryDataTypes.
@Test
public void testNonSupportedBinaryDataTypes() throws Exception {
DataType[] types = new DataType[] { DataTypes.BINARY(BinaryType.MAX_LENGTH) };
CatalogTable table = createCatalogTable(types);
catalog.createDatabase(db1, createDb(), false);
exception.expect(UnsupportedOperationException.class);
catalog.createTable(path1, table, false);
}
Aggregations