use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.
the class HiveMultiTenantWriter method processElement.
@Override
public void processElement(SerializedRecord serializedRecord, Context context, Collector<PartitionCommitInfo> collector) throws Exception {
final long dataFlowId = serializedRecord.getDataFlowId();
synchronized (hiveWriters) {
final HiveWriter hiveWriter = hiveWriters.get(dataFlowId);
if (hiveWriter == null) {
LOG.warn("Cannot get DataFlowInfo with id {}", dataFlowId);
return;
}
hiveWriter.processElement(recordTransformer.toRecord(serializedRecord).getRow(), proxyContext.setContext(context), collector);
if (auditImp != null) {
Pair<String, String> groupIdAndStreamId = inLongGroupIdAndStreamIdMap.getOrDefault(serializedRecord.getDataFlowId(), Pair.of("", ""));
auditImp.add(Constants.METRIC_AUDIT_ID_FOR_OUTPUT, groupIdAndStreamId.getLeft(), groupIdAndStreamId.getRight(), serializedRecord.getTimestampMillis(), 1, serializedRecord.getData().length);
}
}
}
use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.
the class Entrance method buildSinkStream.
private static void buildSinkStream(DataStream<Row> sourceStream, Configuration config, SinkInfo sinkInfo, Map<String, Object> properties, long dataflowId) throws IOException, ClassNotFoundException {
final String sinkType = checkNotNull(config.getString(Constants.SINK_TYPE));
final int sinkParallelism = config.getInteger(Constants.SINK_PARALLELISM);
switch(sinkType) {
case Constants.SINK_TYPE_CLICKHOUSE:
checkState(sinkInfo instanceof ClickHouseSinkInfo);
ClickHouseSinkInfo clickHouseSinkInfo = (ClickHouseSinkInfo) sinkInfo;
sourceStream.addSink(new ClickhouseRowSinkFunction(clickHouseSinkInfo)).uid(Constants.SINK_UID).name("Clickhouse Sink").setParallelism(sinkParallelism);
break;
case Constants.SINK_TYPE_HIVE:
checkState(sinkInfo instanceof HiveSinkInfo);
HiveSinkInfo hiveSinkInfo = (HiveSinkInfo) sinkInfo;
if (hiveSinkInfo.getPartitions().length == 0) {
// The committer operator is not necessary if partition is not existent.
sourceStream.process(new HiveWriter(config, dataflowId, hiveSinkInfo)).uid(Constants.SINK_UID).name("Hive Sink").setParallelism(sinkParallelism);
} else {
sourceStream.process(new HiveWriter(config, dataflowId, hiveSinkInfo)).uid(Constants.SINK_UID).name("Hive Sink").setParallelism(sinkParallelism).addSink(new HiveCommitter(config, hiveSinkInfo)).name("Hive Committer").setParallelism(1);
}
break;
case Constants.SINK_TYPE_ICEBERG:
checkState(sinkInfo instanceof IcebergSinkInfo);
IcebergSinkInfo icebergSinkInfo = (IcebergSinkInfo) sinkInfo;
TableLoader tableLoader = TableLoader.fromHadoopTable(icebergSinkInfo.getTableLocation(), new org.apache.hadoop.conf.Configuration());
FlinkSink.forRow(sourceStream, CommonUtils.getTableSchema(sinkInfo.getFields())).tableLoader(tableLoader).writeParallelism(sinkParallelism).build();
break;
case Constants.SINK_TYPE_KAFKA:
checkState(sinkInfo instanceof KafkaSinkInfo);
SerializationSchema<Row> schema = SerializationSchemaFactory.build(sinkInfo.getFields(), ((KafkaSinkInfo) sinkInfo).getSerializationInfo());
sourceStream.addSink(buildKafkaSink((KafkaSinkInfo) sinkInfo, properties, schema, config)).uid(Constants.SINK_UID).name("Kafka Sink").setParallelism(sinkParallelism);
break;
default:
throw new IllegalArgumentException("Unsupported sink type " + sinkType);
}
}
use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.
the class HiveSinkWithoutPartitionTestCase method test.
@Test(timeout = 60000)
public void test() throws Exception {
config.setLong(Constants.SINK_HIVE_ROLLING_POLICY_CHECK_INTERVAL, 1000L);
config.setLong(Constants.SINK_HIVE_ROLLING_POLICY_ROLLOVER_INTERVAL, 1000L);
final ExecutorService executorService = Executors.newSingleThreadExecutor();
executorService.execute(() -> {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(1000L);
env.setRestartStrategy(RestartStrategies.noRestart());
env.addSource(new TestingSourceFunction()).setParallelism(1).process(new HiveWriter(config, dataflowId, prepareSinkInfo())).setParallelism(1);
try {
// will block here
env.execute();
} catch (Exception e) {
LOG.error("Unexpected exception thrown", e);
} finally {
jobFinishedLatch.countDown();
}
});
try {
boolean fileVerified = false;
while (true) {
if (!fileVerified) {
fileVerified = verifyHdfsFile();
// noinspection BusyWait
Thread.sleep(1000);
} else {
break;
}
}
verificationFinishedLatch.countDown();
jobFinishedLatch.await();
} finally {
executorService.shutdown();
}
}
Aggregations