use of org.apache.flink.table.data.RowData in project flink by apache.
the class CsvRowDataSerDeSchemaTest method testSerializationWithTypesMismatch.
@Test
public void testSerializationWithTypesMismatch() {
DataType dataType = ROW(FIELD("f0", STRING()), FIELD("f1", INT()), FIELD("f2", INT()));
RowType rowType = (RowType) dataType.getLogicalType();
CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
RowData rowData = rowData("Test", 1, "Test");
String errorMessage = "Fail to serialize at field: f2.";
try {
serialize(serSchemaBuilder, rowData);
fail("expecting exception message:" + errorMessage);
} catch (Throwable t) {
assertThat(t, FlinkMatchers.containsMessage(errorMessage));
}
}
use of org.apache.flink.table.data.RowData in project flink by apache.
the class CsvRowDataSerDeSchemaTest method testSerDeConsistency.
private void testSerDeConsistency(RowData originalRow, CsvRowDataSerializationSchema.Builder serSchemaBuilder, CsvRowDataDeserializationSchema.Builder deserSchemaBuilder) throws Exception {
RowData deserializedRow = deserialize(deserSchemaBuilder, new String(serialize(serSchemaBuilder, originalRow)));
assertEquals(deserializedRow, originalRow);
}
use of org.apache.flink.table.data.RowData in project flink by apache.
the class ArrowUtils method collectAsPandasDataFrame.
/**
* Convert Flink table to Pandas DataFrame.
*/
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
checkArrowUsable();
BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
arrowStreamWriter.start();
Iterator<Row> results = table.execute().collect();
Iterator<Row> appendOnlyResults;
if (isAppendOnlyTable(table)) {
appendOnlyResults = results;
} else {
appendOnlyResults = filterOutRetractRows(results);
}
ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
Iterator convertedResults = new Iterator<RowData>() {
@Override
public boolean hasNext() {
return appendOnlyResults.hasNext();
}
@Override
public RowData next() {
DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
return (RowData) converter.toInternal(appendOnlyResults.next());
}
};
return new CustomIterator<byte[]>() {
@Override
public boolean hasNext() {
return convertedResults.hasNext();
}
@Override
public byte[] next() {
try {
int i = 0;
while (convertedResults.hasNext() && i < maxArrowBatchSize) {
i++;
arrowWriter.write(convertedResults.next());
}
arrowWriter.finish();
arrowStreamWriter.writeBatch();
return baos.toByteArray();
} catch (Throwable t) {
String msg = "Failed to serialize the data of the table";
LOG.error(msg, t);
throw new RuntimeException(msg, t);
} finally {
arrowWriter.reset();
baos.reset();
if (!hasNext()) {
root.close();
allocator.close();
}
}
}
};
}
use of org.apache.flink.table.data.RowData in project flink by apache.
the class ElasticsearchDynamicSink method getSinkRuntimeProvider.
@Override
public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
SerializationSchema<RowData> format = this.format.createRuntimeEncoder(context, physicalRowDataType);
final RowElasticsearchEmitter rowElasticsearchEmitter = new RowElasticsearchEmitter(createIndexGenerator(), format, XContentType.JSON, documentType, createKeyExtractor());
ElasticsearchSinkBuilderBase<RowData, ? extends ElasticsearchSinkBuilderBase> builder = builderSupplier.get();
builder.setEmitter(rowElasticsearchEmitter);
builder.setHosts(config.getHosts().toArray(new HttpHost[0]));
builder.setDeliveryGuarantee(config.getDeliveryGuarantee());
builder.setBulkFlushMaxActions(config.getBulkFlushMaxActions());
builder.setBulkFlushMaxSizeMb(config.getBulkFlushMaxByteSize().getMebiBytes());
builder.setBulkFlushInterval(config.getBulkFlushInterval());
if (config.getBulkFlushBackoffType().isPresent()) {
FlushBackoffType backoffType = config.getBulkFlushBackoffType().get();
int backoffMaxRetries = config.getBulkFlushBackoffRetries().get();
long backoffDelayMs = config.getBulkFlushBackoffDelay().get();
builder.setBulkFlushBackoffStrategy(backoffType, backoffMaxRetries, backoffDelayMs);
}
if (config.getUsername().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) {
builder.setConnectionUsername(config.getUsername().get());
}
if (config.getPassword().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get())) {
builder.setConnectionPassword(config.getPassword().get());
}
if (config.getPathPrefix().isPresent() && !StringUtils.isNullOrWhitespaceOnly(config.getPathPrefix().get())) {
builder.setConnectionPathPrefix(config.getPathPrefix().get());
}
if (config.getConnectionRequestTimeout().isPresent()) {
builder.setConnectionRequestTimeout((int) config.getConnectionRequestTimeout().get().getSeconds());
}
if (config.getConnectionTimeout().isPresent()) {
builder.setConnectionTimeout((int) config.getConnectionTimeout().get().getSeconds());
}
if (config.getSocketTimeout().isPresent()) {
builder.setSocketTimeout((int) config.getSocketTimeout().get().getSeconds());
}
return SinkV2Provider.of(builder.build(), config.getParallelism().orElse(null));
}
use of org.apache.flink.table.data.RowData in project flink by apache.
the class HiveLookupJoinITCase method testPartitionFetcherAndReader.
@Test
public void testPartitionFetcherAndReader() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
lookupFunction.open(null);
PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
List<HiveTablePartition> partitions = fetcher.fetch(context);
// fetch latest partition by partition-name
assertEquals(1, partitions.size());
PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
reader.open(partitions);
List<RowData> res = new ArrayList<>();
ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
RowData row;
while ((row = reader.read(reuse)) != null) {
res.add(serializer.copy(row));
}
res.sort(Comparator.comparingInt(o -> o.getInt(0)));
assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Aggregations