use of org.apache.flink.streaming.api.datastream.DataStreamSink in project flink by apache.
the class HiveTableSink method createBatchSink.
private DataStreamSink<Row> createBatchSink(DataStream<RowData> dataStream, DataStructureConverter converter, StorageDescriptor sd, HiveWriterFactory recordWriterFactory, OutputFileConfig fileNaming, final int parallelism) throws IOException {
FileSystemOutputFormat.Builder<Row> builder = new FileSystemOutputFormat.Builder<>();
builder.setPartitionComputer(new HiveRowPartitionComputer(hiveShim, JobConfUtils.getDefaultPartitionName(jobConf), tableSchema.getFieldNames(), tableSchema.getFieldDataTypes(), getPartitionKeyArray()));
builder.setDynamicGrouped(dynamicGrouping);
builder.setPartitionColumns(getPartitionKeyArray());
builder.setFileSystemFactory(fsFactory());
builder.setFormatFactory(new HiveOutputFormatFactory(recordWriterFactory));
builder.setMetaStoreFactory(msFactory());
builder.setOverwrite(overwrite);
builder.setStaticPartitions(staticPartitionSpec);
builder.setTempPath(new org.apache.flink.core.fs.Path(toStagingDir(sd.getLocation(), jobConf)));
builder.setOutputFileConfig(fileNaming);
return dataStream.map((MapFunction<RowData, Row>) value -> (Row) converter.toExternal(value)).writeUsingOutputFormat(builder.build()).setParallelism(parallelism);
}
use of org.apache.flink.streaming.api.datastream.DataStreamSink in project flink by apache.
the class StreamGraphCoLocationConstraintTest method testSettingCoLocationConstraint.
@Test
public void testSettingCoLocationConstraint() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(7);
// set up the test program
DataStream<Long> source = env.generateSequence(1L, 10_000_000);
source.getTransformation().setCoLocationGroupKey("group1");
DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
step1.getTransformation().setCoLocationGroupKey("group2");
DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
step2.getTransformation().setCoLocationGroupKey("group1");
DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
result.getTransformation().setCoLocationGroupKey("group2");
// get the graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
assertEquals(4, jobGraph.getNumberOfVertices());
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
for (JobVertex vertex : vertices) {
assertNotNull(vertex.getCoLocationGroup());
}
assertEquals(vertices.get(0).getCoLocationGroup(), vertices.get(2).getCoLocationGroup());
assertEquals(vertices.get(1).getCoLocationGroup(), vertices.get(3).getCoLocationGroup());
}
use of org.apache.flink.streaming.api.datastream.DataStreamSink in project flink by apache.
the class StreamGraphCoLocationConstraintTest method testCoLocateDifferenSharingGroups.
@Test
public void testCoLocateDifferenSharingGroups() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(7);
// set up the test program
DataStream<Long> source = env.generateSequence(1L, 10_000_000);
source.getTransformation().setSlotSharingGroup("ssg1");
source.getTransformation().setCoLocationGroupKey("co1");
DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
step1.getTransformation().setSlotSharingGroup("ssg2");
step1.getTransformation().setCoLocationGroupKey("co2");
DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
step2.getTransformation().setSlotSharingGroup("ssg3");
step2.getTransformation().setCoLocationGroupKey("co1");
DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
result.getTransformation().setSlotSharingGroup("ssg4");
result.getTransformation().setCoLocationGroupKey("co2");
// get the graph
try {
env.getStreamGraph().getJobGraph();
fail("exception expected");
} catch (IllegalStateException ignored) {
}
}
use of org.apache.flink.streaming.api.datastream.DataStreamSink in project flink by apache.
the class CommonExecLegacySink method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<T> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (tableSink instanceof StreamTableSink) {
final Transformation<T> transform;
if (tableSink instanceof RetractStreamTableSink) {
transform = translateToTransformation(planner, config, true);
} else if (tableSink instanceof UpsertStreamTableSink) {
UpsertStreamTableSink<T> upsertSink = (UpsertStreamTableSink<T>) tableSink;
final boolean isAppendOnlyTable = !needRetraction;
upsertSink.setIsAppendOnly(isAppendOnlyTable);
if (upsertKeys != null) {
upsertSink.setKeyFields(upsertKeys);
} else {
if (isAppendOnlyTable) {
upsertSink.setKeyFields(null);
} else {
throw new TableException("UpsertStreamTableSink requires that Table has a full primary keys if it is updated.");
}
}
transform = translateToTransformation(planner, config, true);
} else if (tableSink instanceof AppendStreamTableSink) {
// verify table is an insert-only (append-only) table
if (needRetraction) {
throw new TableException("AppendStreamTableSink requires that Table has only insert changes.");
}
transform = translateToTransformation(planner, config, false);
} else {
if (isStreaming) {
throw new TableException("Stream Tables can only be emitted by AppendStreamTableSink, " + "RetractStreamTableSink, or UpsertStreamTableSink.");
} else {
transform = translateToTransformation(planner, config, false);
}
}
final DataStream<T> dataStream = new DataStream<T>(planner.getExecEnv(), transform);
final DataStreamSink<T> dsSink = (DataStreamSink<T>) ((StreamTableSink<T>) tableSink).consumeDataStream(dataStream);
if (dsSink == null) {
throw new TableException(String.format("The StreamTableSink#consumeDataStream(DataStream) must be implemented " + "and return the sink transformation DataStreamSink. " + "However, %s doesn't implement this method.", tableSink.getClass().getCanonicalName()));
}
return dsSink.getLegacyTransformation();
} else if (tableSink instanceof DataStreamTableSink) {
// is no real table sink, so we just need translate its input to Transformation.
return translateToTransformation(planner, config, ((DataStreamTableSink<T>) tableSink).withChangeFlag());
} else {
throw new TableException(String.format("Only Support StreamTableSink! However %s is not a StreamTableSink.", tableSink.getClass().getCanonicalName()));
}
}
Aggregations