Search in sources :

Example 1 with SourceInfo

use of org.apache.inlong.sort.protocol.source.SourceInfo in project incubator-inlong by apache.

the class SourceInfoUtils method createSourceInfo.

/**
 * Create source info for DataFlowInfo.
 */
public static SourceInfo createSourceInfo(PulsarClusterInfo pulsarCluster, String masterAddress, ClusterBean clusterBean, InlongGroupInfo groupInfo, InlongStreamInfo streamInfo, SourceResponse sourceResponse, List<FieldInfo> sourceFields) {
    String mqType = groupInfo.getMiddlewareType();
    DeserializationInfo deserializationInfo = SerializationUtils.createDeserialInfo(sourceResponse, streamInfo);
    SourceInfo sourceInfo;
    if (Constant.MIDDLEWARE_PULSAR.equals(mqType) || Constant.MIDDLEWARE_TDMQ_PULSAR.equals(mqType)) {
        sourceInfo = createPulsarSourceInfo(pulsarCluster, clusterBean, groupInfo, streamInfo, deserializationInfo, sourceFields);
    } else if (Constant.MIDDLEWARE_TUBE.equals(mqType)) {
        // InlongGroupInfo groupInfo, String masterAddress,
        sourceInfo = createTubeSourceInfo(groupInfo, masterAddress, clusterBean, deserializationInfo, sourceFields);
    } else {
        throw new WorkflowListenerException(String.format("Unsupported middleware {%s}", mqType));
    }
    return sourceInfo;
}
Also used : DeserializationInfo(org.apache.inlong.sort.protocol.deserialization.DeserializationInfo) TubeSourceInfo(org.apache.inlong.sort.protocol.source.TubeSourceInfo) SourceInfo(org.apache.inlong.sort.protocol.source.SourceInfo) PulsarSourceInfo(org.apache.inlong.sort.protocol.source.PulsarSourceInfo) TDMQPulsarSourceInfo(org.apache.inlong.sort.protocol.source.TDMQPulsarSourceInfo) WorkflowListenerException(org.apache.inlong.manager.common.exceptions.WorkflowListenerException)

Example 2 with SourceInfo

use of org.apache.inlong.sort.protocol.source.SourceInfo in project incubator-inlong by apache.

the class CommonOperateService method createDataFlow.

/**
 * Create dataflow info for sort.
 */
public DataFlowInfo createDataFlow(InlongGroupInfo groupInfo, SinkResponse sinkResponse) {
    String groupId = sinkResponse.getInlongGroupId();
    String streamId = sinkResponse.getInlongStreamId();
    // TODO Support all source type, include AUTO_PUSH.
    List<SourceResponse> sourceList = streamSourceService.listSource(groupId, streamId);
    if (CollectionUtils.isEmpty(sourceList)) {
        throw new WorkflowListenerException(String.format("Source not found by groupId=%s and streamId=%s", groupId, streamId));
    }
    // Get all field info
    List<FieldInfo> sourceFields = new ArrayList<>();
    List<FieldInfo> sinkFields = new ArrayList<>();
    String partition = null;
    if (SinkType.forType(sinkResponse.getSinkType()) == SinkType.HIVE) {
        HiveSinkResponse hiveSink = (HiveSinkResponse) sinkResponse;
        partition = hiveSink.getPrimaryPartition();
    }
    // TODO Support more than one source and one sink
    final SourceResponse sourceResponse = sourceList.get(0);
    boolean isAllMigration = SourceInfoUtils.isBinlogAllMigration(sourceResponse);
    FieldMappingRule fieldMappingRule = FieldInfoUtils.createFieldInfo(isAllMigration, sinkResponse.getFieldList(), sourceFields, sinkFields, partition);
    // Get source info
    String masterAddress = getSpecifiedParam(Constant.TUBE_MASTER_URL);
    PulsarClusterInfo pulsarCluster = getPulsarClusterInfo(groupInfo.getMiddlewareType());
    InlongStreamInfo streamInfo = streamService.get(groupId, streamId);
    SourceInfo sourceInfo = SourceInfoUtils.createSourceInfo(pulsarCluster, masterAddress, clusterBean, groupInfo, streamInfo, sourceResponse, sourceFields);
    // Get sink info
    SinkInfo sinkInfo = SinkInfoUtils.createSinkInfo(sourceResponse, sinkResponse, sinkFields);
    // Get transformation info
    TransformationInfo transInfo = new TransformationInfo(fieldMappingRule);
    // Get properties
    Map<String, Object> properties = new HashMap<>();
    if (MapUtils.isNotEmpty(sinkResponse.getProperties())) {
        properties.putAll(sinkResponse.getProperties());
    }
    properties.put(Constant.DATA_FLOW_GROUP_ID_KEY, groupId);
    return new DataFlowInfo(sinkResponse.getId(), sourceInfo, transInfo, sinkInfo, properties);
}
Also used : SourceResponse(org.apache.inlong.manager.common.pojo.source.SourceResponse) FieldMappingRule(org.apache.inlong.sort.protocol.transformation.FieldMappingRule) SourceInfo(org.apache.inlong.sort.protocol.source.SourceInfo) HashMap(java.util.HashMap) PulsarClusterInfo(org.apache.inlong.common.pojo.dataproxy.PulsarClusterInfo) ArrayList(java.util.ArrayList) SinkInfo(org.apache.inlong.sort.protocol.sink.SinkInfo) HiveSinkResponse(org.apache.inlong.manager.common.pojo.sink.hive.HiveSinkResponse) WorkflowListenerException(org.apache.inlong.manager.common.exceptions.WorkflowListenerException) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) InlongStreamInfo(org.apache.inlong.manager.common.pojo.stream.InlongStreamInfo) TransformationInfo(org.apache.inlong.sort.protocol.transformation.TransformationInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo)

Example 3 with SourceInfo

use of org.apache.inlong.sort.protocol.source.SourceInfo in project incubator-inlong by apache.

the class FieldMappingTransformerTest method testTransform.

@Test
public void testTransform() throws Exception {
    final FieldInfo fieldInfo = new FieldInfo("id", new LongFormatInfo());
    final FieldInfo extraFieldInfo = new FieldInfo("not_important", new StringFormatInfo());
    final SourceInfo sourceInfo = new TestingSourceInfo(new FieldInfo[] { extraFieldInfo, fieldInfo });
    final SinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { extraFieldInfo, fieldInfo });
    final long dataFlowId = 1L;
    final DataFlowInfo dataFlowInfo = new DataFlowInfo(dataFlowId, sourceInfo, sinkInfo);
    final FieldMappingTransformer transformer = new FieldMappingTransformer(new Configuration());
    transformer.addDataFlow(dataFlowInfo);
    // should be 4 fields (2 origin fields + time + attr)
    final Row sourceRow = new Row(2 + SOURCE_FIELD_SKIP_STEP);
    sourceRow.setField(0, System.currentTimeMillis());
    sourceRow.setField(1, "attr");
    sourceRow.setField(2, "not important");
    sourceRow.setField(3, 9527L);
    final Record sourceRecord = new Record(dataFlowId, System.currentTimeMillis(), sourceRow);
    final Record sinkRecord = transformer.transform(sourceRecord);
    assertEquals(dataFlowId, sinkRecord.getDataflowId());
    assertEquals(2, sinkRecord.getRow().getArity());
    assertEquals("not important", sinkRecord.getRow().getField(0));
    assertEquals(9527L, sinkRecord.getRow().getField(1));
}
Also used : TestingSourceInfo(org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo) SourceInfo(org.apache.inlong.sort.protocol.source.SourceInfo) TestingSourceInfo(org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo) Configuration(org.apache.inlong.sort.configuration.Configuration) SinkInfo(org.apache.inlong.sort.protocol.sink.SinkInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Record(org.apache.inlong.sort.flink.Record) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Row(org.apache.flink.types.Row) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) BuiltInFieldInfo(org.apache.inlong.sort.protocol.BuiltInFieldInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo) Test(org.junit.Test)

Example 4 with SourceInfo

use of org.apache.inlong.sort.protocol.source.SourceInfo in project incubator-inlong by apache.

the class FieldMappingTransformerTest method testTransformWithDt.

@Test
public void testTransformWithDt() throws Exception {
    final FieldInfo fieldInfo = new FieldInfo("id", new LongFormatInfo());
    final FieldInfo dtFieldInfo = new BuiltInFieldInfo("dt", new TimestampFormatInfo(), BuiltInField.DATA_TIME);
    final SourceInfo sourceInfo = new TestingSourceInfo(new FieldInfo[] { fieldInfo, dtFieldInfo });
    final SinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { fieldInfo, dtFieldInfo });
    final long dataFlowId = 1L;
    final DataFlowInfo dataFlowInfo = new DataFlowInfo(dataFlowId, sourceInfo, sinkInfo);
    final FieldMappingTransformer transformer = new FieldMappingTransformer(new Configuration());
    transformer.addDataFlow(dataFlowInfo);
    // should be 3 fields (1 origin fields + time + attr)
    final Row sourceRow = new Row(1 + SOURCE_FIELD_SKIP_STEP);
    final long dt = System.currentTimeMillis();
    sourceRow.setField(0, dt);
    sourceRow.setField(1, "attr");
    sourceRow.setField(2, 9527L);
    final Record sourceRecord = new Record(dataFlowId, dt, sourceRow);
    final Record sinkRecord = transformer.transform(sourceRecord);
    assertEquals(dataFlowId, sinkRecord.getDataflowId());
    assertEquals(2, sinkRecord.getRow().getArity());
    assertEquals(9527L, sinkRecord.getRow().getField(0));
    assertEquals(new Timestamp(dt), sinkRecord.getRow().getField(1));
}
Also used : TestingSourceInfo(org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo) SourceInfo(org.apache.inlong.sort.protocol.source.SourceInfo) TestingSourceInfo(org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo) Configuration(org.apache.inlong.sort.configuration.Configuration) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) SinkInfo(org.apache.inlong.sort.protocol.sink.SinkInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Timestamp(java.sql.Timestamp) BuiltInFieldInfo(org.apache.inlong.sort.protocol.BuiltInFieldInfo) Record(org.apache.inlong.sort.flink.Record) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) TestingSinkInfo(org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo) Row(org.apache.flink.types.Row) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) BuiltInFieldInfo(org.apache.inlong.sort.protocol.BuiltInFieldInfo) DataFlowInfo(org.apache.inlong.sort.protocol.DataFlowInfo) Test(org.junit.Test)

Aggregations

SourceInfo (org.apache.inlong.sort.protocol.source.SourceInfo)4 DataFlowInfo (org.apache.inlong.sort.protocol.DataFlowInfo)3 FieldInfo (org.apache.inlong.sort.protocol.FieldInfo)3 SinkInfo (org.apache.inlong.sort.protocol.sink.SinkInfo)3 Row (org.apache.flink.types.Row)2 WorkflowListenerException (org.apache.inlong.manager.common.exceptions.WorkflowListenerException)2 Configuration (org.apache.inlong.sort.configuration.Configuration)2 Record (org.apache.inlong.sort.flink.Record)2 LongFormatInfo (org.apache.inlong.sort.formats.common.LongFormatInfo)2 BuiltInFieldInfo (org.apache.inlong.sort.protocol.BuiltInFieldInfo)2 TestingSinkInfo (org.apache.inlong.sort.util.TestingUtils.TestingSinkInfo)2 TestingSourceInfo (org.apache.inlong.sort.util.TestingUtils.TestingSourceInfo)2 Test (org.junit.Test)2 Timestamp (java.sql.Timestamp)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 PulsarClusterInfo (org.apache.inlong.common.pojo.dataproxy.PulsarClusterInfo)1 HiveSinkResponse (org.apache.inlong.manager.common.pojo.sink.hive.HiveSinkResponse)1 SourceResponse (org.apache.inlong.manager.common.pojo.source.SourceResponse)1 InlongStreamInfo (org.apache.inlong.manager.common.pojo.stream.InlongStreamInfo)1