use of org.apache.inlong.sort.protocol.DataFlowInfo in project incubator-inlong by apache.
the class MetaManager method addDataFlow.
/**
* addDataFlow
*
* @param dataFlowInfo
* @throws Exception
*/
public void addDataFlow(DataFlowInfo dataFlowInfo) throws Exception {
synchronized (LOCK) {
long dataFlowId = dataFlowInfo.getId();
DataFlowInfo oldDataFlowInfo = dataFlowInfoMap.put(dataFlowId, dataFlowInfo);
if (oldDataFlowInfo == null) {
LOG.info("Try to add dataFlow {}", dataFlowId);
for (DataFlowInfoListener dataFlowInfoListener : dataFlowInfoListeners) {
try {
dataFlowInfoListener.addDataFlow(dataFlowInfo);
} catch (Exception e) {
LOG.warn("Error happens when notifying listener data flow added", e);
}
}
} else {
LOG.warn("DataFlow {} should not be exist", dataFlowId);
for (DataFlowInfoListener dataFlowInfoListener : dataFlowInfoListeners) {
try {
dataFlowInfoListener.updateDataFlow(dataFlowInfo);
} catch (Exception e) {
LOG.warn("Error happens when notifying listener data flow updated", e);
}
}
}
}
}
use of org.apache.inlong.sort.protocol.DataFlowInfo in project incubator-inlong by apache.
the class MetaManagerTest method testInitializeDataFlow.
@Test(timeout = 30000)
public void testInitializeDataFlow() throws Exception {
final Configuration config = new Configuration();
config.setString(Constants.CLUSTER_ID, cluster);
config.setString(Constants.ZOOKEEPER_QUORUM, ZOOKEEPER.getConnectString());
config.setString(Constants.ZOOKEEPER_ROOT, zkRoot);
// add dataFlow
ZkTools.addDataFlowToCluster(cluster, 1, ZOOKEEPER.getConnectString(), zkRoot);
ZkTools.updateDataFlowInfo(prepareDataFlowInfo(1), cluster, 1, ZOOKEEPER.getConnectString(), zkRoot);
// open MetaManager
final MetaManager metaManager = MetaManager.getInstance(config);
// register dataFlowInfoListener
final TestDataFlowInfoListener testDataFlowInfoListener = new TestDataFlowInfoListener();
metaManager.registerDataFlowInfoListener(testDataFlowInfoListener);
List<Object> operations = testDataFlowInfoListener.getOperations();
assertEquals(1, operations.size());
assertTrue(operations.get(0) instanceof DataFlowInfo);
assertEquals(1, ((DataFlowInfo) operations.get(0)).getId());
}
use of org.apache.inlong.sort.protocol.DataFlowInfo in project incubator-inlong by apache.
the class Entrance method main.
public static void main(String[] args) throws Exception {
final ParameterTool parameterTool = ParameterTool.fromArgs(args);
final Configuration config = parameterTool.getConfiguration();
final String clusterId = checkNotNull(config.getString(Constants.CLUSTER_ID));
final DataFlowInfo dataFlowInfo = getDataflowInfoFromFile(config.getString(Constants.DATAFLOW_INFO_FILE));
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// Checkpoint related
env.enableCheckpointing(config.getInteger(Constants.CHECKPOINT_INTERVAL_MS));
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(config.getInteger(Constants.MIN_PAUSE_BETWEEN_CHECKPOINTS_MS));
env.getCheckpointConfig().setCheckpointTimeout(config.getInteger(Constants.CHECKPOINT_TIMEOUT_MS));
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
DataStream<SerializedRecord> sourceStream = buildSourceStream(env, config, dataFlowInfo.getSourceInfo(), dataFlowInfo.getProperties());
DataStream<Row> deserializedStream = buildDeserializationStream(sourceStream, dataFlowInfo.getSourceInfo(), config);
DataStream<Row> transformationStream = buildTransformationStream(deserializedStream, dataFlowInfo, config);
buildSinkStream(transformationStream, config, dataFlowInfo.getSinkInfo(), dataFlowInfo.getProperties(), dataFlowInfo.getId());
env.execute(clusterId);
}
use of org.apache.inlong.sort.protocol.DataFlowInfo in project incubator-inlong by apache.
the class CommonOperateService method createDataFlow.
/**
* Create dataflow info for sort.
*/
public DataFlowInfo createDataFlow(InlongGroupInfo groupInfo, SinkResponse sinkResponse) {
String groupId = sinkResponse.getInlongGroupId();
String streamId = sinkResponse.getInlongStreamId();
// TODO Support all source type, include AUTO_PUSH.
List<SourceResponse> sourceList = streamSourceService.listSource(groupId, streamId);
if (CollectionUtils.isEmpty(sourceList)) {
throw new WorkflowListenerException(String.format("Source not found by groupId=%s and streamId=%s", groupId, streamId));
}
// Get all field info
List<FieldInfo> sourceFields = new ArrayList<>();
List<FieldInfo> sinkFields = new ArrayList<>();
String partition = null;
if (SinkType.forType(sinkResponse.getSinkType()) == SinkType.HIVE) {
HiveSinkResponse hiveSink = (HiveSinkResponse) sinkResponse;
partition = hiveSink.getPrimaryPartition();
}
// TODO Support more than one source and one sink
final SourceResponse sourceResponse = sourceList.get(0);
boolean isAllMigration = SourceInfoUtils.isBinlogAllMigration(sourceResponse);
FieldMappingRule fieldMappingRule = FieldInfoUtils.createFieldInfo(isAllMigration, sinkResponse.getFieldList(), sourceFields, sinkFields, partition);
// Get source info
String masterAddress = getSpecifiedParam(Constant.TUBE_MASTER_URL);
PulsarClusterInfo pulsarCluster = getPulsarClusterInfo(groupInfo.getMiddlewareType());
InlongStreamInfo streamInfo = streamService.get(groupId, streamId);
SourceInfo sourceInfo = SourceInfoUtils.createSourceInfo(pulsarCluster, masterAddress, clusterBean, groupInfo, streamInfo, sourceResponse, sourceFields);
// Get sink info
SinkInfo sinkInfo = SinkInfoUtils.createSinkInfo(sourceResponse, sinkResponse, sinkFields);
// Get transformation info
TransformationInfo transInfo = new TransformationInfo(fieldMappingRule);
// Get properties
Map<String, Object> properties = new HashMap<>();
if (MapUtils.isNotEmpty(sinkResponse.getProperties())) {
properties.putAll(sinkResponse.getProperties());
}
properties.put(Constant.DATA_FLOW_GROUP_ID_KEY, groupId);
return new DataFlowInfo(sinkResponse.getId(), sourceInfo, transInfo, sinkInfo, properties);
}
use of org.apache.inlong.sort.protocol.DataFlowInfo in project incubator-inlong by apache.
the class FieldMappingTransformerTest method testTransform.
@Test
public void testTransform() throws Exception {
final FieldInfo fieldInfo = new FieldInfo("id", new LongFormatInfo());
final FieldInfo extraFieldInfo = new FieldInfo("not_important", new StringFormatInfo());
final SourceInfo sourceInfo = new TestingSourceInfo(new FieldInfo[] { extraFieldInfo, fieldInfo });
final SinkInfo sinkInfo = new TestingSinkInfo(new FieldInfo[] { extraFieldInfo, fieldInfo });
final long dataFlowId = 1L;
final DataFlowInfo dataFlowInfo = new DataFlowInfo(dataFlowId, sourceInfo, sinkInfo);
final FieldMappingTransformer transformer = new FieldMappingTransformer(new Configuration());
transformer.addDataFlow(dataFlowInfo);
// should be 4 fields (2 origin fields + time + attr)
final Row sourceRow = new Row(2 + SOURCE_FIELD_SKIP_STEP);
sourceRow.setField(0, System.currentTimeMillis());
sourceRow.setField(1, "attr");
sourceRow.setField(2, "not important");
sourceRow.setField(3, 9527L);
final Record sourceRecord = new Record(dataFlowId, System.currentTimeMillis(), sourceRow);
final Record sinkRecord = transformer.transform(sourceRecord);
assertEquals(dataFlowId, sinkRecord.getDataflowId());
assertEquals(2, sinkRecord.getRow().getArity());
assertEquals("not important", sinkRecord.getRow().getField(0));
assertEquals(9527L, sinkRecord.getRow().getField(1));
}
Aggregations