Search in sources :

Example 1 with HiveWriter

use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.

the class HiveMultiTenantWriter method processElement.

@Override
public void processElement(SerializedRecord serializedRecord, Context context, Collector<PartitionCommitInfo> collector) throws Exception {
    final long dataFlowId = serializedRecord.getDataFlowId();
    synchronized (hiveWriters) {
        final HiveWriter hiveWriter = hiveWriters.get(dataFlowId);
        if (hiveWriter == null) {
            LOG.warn("Cannot get DataFlowInfo with id {}", dataFlowId);
            return;
        }
        hiveWriter.processElement(recordTransformer.toRecord(serializedRecord).getRow(), proxyContext.setContext(context), collector);
        if (auditImp != null) {
            Pair<String, String> groupIdAndStreamId = inLongGroupIdAndStreamIdMap.getOrDefault(serializedRecord.getDataFlowId(), Pair.of("", ""));
            auditImp.add(Constants.METRIC_AUDIT_ID_FOR_OUTPUT, groupIdAndStreamId.getLeft(), groupIdAndStreamId.getRight(), serializedRecord.getTimestampMillis(), 1, serializedRecord.getData().length);
        }
    }
}
Also used : HiveWriter(org.apache.inlong.sort.flink.hive.HiveWriter)

Example 2 with HiveWriter

use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.

the class Entrance method buildSinkStream.

private static void buildSinkStream(DataStream<Row> sourceStream, Configuration config, SinkInfo sinkInfo, Map<String, Object> properties, long dataflowId) throws IOException, ClassNotFoundException {
    final String sinkType = checkNotNull(config.getString(Constants.SINK_TYPE));
    final int sinkParallelism = config.getInteger(Constants.SINK_PARALLELISM);
    switch(sinkType) {
        case Constants.SINK_TYPE_CLICKHOUSE:
            checkState(sinkInfo instanceof ClickHouseSinkInfo);
            ClickHouseSinkInfo clickHouseSinkInfo = (ClickHouseSinkInfo) sinkInfo;
            sourceStream.addSink(new ClickhouseRowSinkFunction(clickHouseSinkInfo)).uid(Constants.SINK_UID).name("Clickhouse Sink").setParallelism(sinkParallelism);
            break;
        case Constants.SINK_TYPE_HIVE:
            checkState(sinkInfo instanceof HiveSinkInfo);
            HiveSinkInfo hiveSinkInfo = (HiveSinkInfo) sinkInfo;
            if (hiveSinkInfo.getPartitions().length == 0) {
                // The committer operator is not necessary if partition is not existent.
                sourceStream.process(new HiveWriter(config, dataflowId, hiveSinkInfo)).uid(Constants.SINK_UID).name("Hive Sink").setParallelism(sinkParallelism);
            } else {
                sourceStream.process(new HiveWriter(config, dataflowId, hiveSinkInfo)).uid(Constants.SINK_UID).name("Hive Sink").setParallelism(sinkParallelism).addSink(new HiveCommitter(config, hiveSinkInfo)).name("Hive Committer").setParallelism(1);
            }
            break;
        case Constants.SINK_TYPE_ICEBERG:
            checkState(sinkInfo instanceof IcebergSinkInfo);
            IcebergSinkInfo icebergSinkInfo = (IcebergSinkInfo) sinkInfo;
            TableLoader tableLoader = TableLoader.fromHadoopTable(icebergSinkInfo.getTableLocation(), new org.apache.hadoop.conf.Configuration());
            FlinkSink.forRow(sourceStream, CommonUtils.getTableSchema(sinkInfo.getFields())).tableLoader(tableLoader).writeParallelism(sinkParallelism).build();
            break;
        case Constants.SINK_TYPE_KAFKA:
            checkState(sinkInfo instanceof KafkaSinkInfo);
            SerializationSchema<Row> schema = SerializationSchemaFactory.build(sinkInfo.getFields(), ((KafkaSinkInfo) sinkInfo).getSerializationInfo());
            sourceStream.addSink(buildKafkaSink((KafkaSinkInfo) sinkInfo, properties, schema, config)).uid(Constants.SINK_UID).name("Kafka Sink").setParallelism(sinkParallelism);
            break;
        default:
            throw new IllegalArgumentException("Unsupported sink type " + sinkType);
    }
}
Also used : HiveWriter(org.apache.inlong.sort.flink.hive.HiveWriter) IcebergSinkInfo(org.apache.inlong.sort.protocol.sink.IcebergSinkInfo) ClickhouseRowSinkFunction(org.apache.inlong.sort.singletenant.flink.clickhouse.ClickhouseRowSinkFunction) ClickHouseSinkInfo(org.apache.inlong.sort.protocol.sink.ClickHouseSinkInfo) HiveCommitter(org.apache.inlong.sort.flink.hive.HiveCommitter) HiveSinkInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo) KafkaSinkInfo(org.apache.inlong.sort.protocol.sink.KafkaSinkInfo) Row(org.apache.flink.types.Row) TableLoader(org.apache.iceberg.flink.TableLoader)

Example 3 with HiveWriter

use of org.apache.inlong.sort.flink.hive.HiveWriter in project incubator-inlong by apache.

the class HiveSinkWithoutPartitionTestCase method test.

@Test(timeout = 60000)
public void test() throws Exception {
    config.setLong(Constants.SINK_HIVE_ROLLING_POLICY_CHECK_INTERVAL, 1000L);
    config.setLong(Constants.SINK_HIVE_ROLLING_POLICY_ROLLOVER_INTERVAL, 1000L);
    final ExecutorService executorService = Executors.newSingleThreadExecutor();
    executorService.execute(() -> {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(1000L);
        env.setRestartStrategy(RestartStrategies.noRestart());
        env.addSource(new TestingSourceFunction()).setParallelism(1).process(new HiveWriter(config, dataflowId, prepareSinkInfo())).setParallelism(1);
        try {
            // will block here
            env.execute();
        } catch (Exception e) {
            LOG.error("Unexpected exception thrown", e);
        } finally {
            jobFinishedLatch.countDown();
        }
    });
    try {
        boolean fileVerified = false;
        while (true) {
            if (!fileVerified) {
                fileVerified = verifyHdfsFile();
                // noinspection BusyWait
                Thread.sleep(1000);
            } else {
                break;
            }
        }
        verificationFinishedLatch.countDown();
        jobFinishedLatch.await();
    } finally {
        executorService.shutdown();
    }
}
Also used : HiveWriter(org.apache.inlong.sort.flink.hive.HiveWriter) ExecutorService(java.util.concurrent.ExecutorService) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

HiveWriter (org.apache.inlong.sort.flink.hive.HiveWriter)3 IOException (java.io.IOException)1 ExecutorService (java.util.concurrent.ExecutorService)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 Row (org.apache.flink.types.Row)1 TableLoader (org.apache.iceberg.flink.TableLoader)1 HiveCommitter (org.apache.inlong.sort.flink.hive.HiveCommitter)1 ClickHouseSinkInfo (org.apache.inlong.sort.protocol.sink.ClickHouseSinkInfo)1 HiveSinkInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo)1 IcebergSinkInfo (org.apache.inlong.sort.protocol.sink.IcebergSinkInfo)1 KafkaSinkInfo (org.apache.inlong.sort.protocol.sink.KafkaSinkInfo)1 ClickhouseRowSinkFunction (org.apache.inlong.sort.singletenant.flink.clickhouse.ClickhouseRowSinkFunction)1 Test (org.junit.Test)1