Search in sources :

Example 6 with InLongMsg

use of org.apache.inlong.common.msg.InLongMsg in project incubator-inlong by apache.

the class InLongMsgCsvFormatDeserializerTest method testEmptyField.

@Test
public void testEmptyField() throws Exception {
    final InLongMsgCsvFormatDeserializer deserializer = new InLongMsgCsvFormatDeserializer(TEST_ROW_INFO);
    InLongMsg inLongMsg = InLongMsg.newInLongMsg();
    String attrs = "m=0&" + INLONGMSG_ATTR_STREAM_ID + "=testInterfaceId&t=20200322&__addcol1__=1&__addcol2__=2";
    String body1 = "123,field11,field12,";
    String body2 = "123,field21,,field23";
    inLongMsg.addMsg(attrs, body1.getBytes());
    inLongMsg.addMsg(attrs, body2.getBytes());
    Map<String, String> expectedAttributes = new HashMap<>();
    expectedAttributes.put("m", "0");
    expectedAttributes.put(INLONGMSG_ATTR_STREAM_ID, "testInterfaceId");
    expectedAttributes.put("t", "20200322");
    expectedAttributes.put("__addcol1__", "1");
    expectedAttributes.put("__addcol2__", "2");
    Row expectedRow1 = Row.of(Timestamp.valueOf("2020-03-22 00:00:00"), expectedAttributes, 1, 2, 123, "field11", "field12", "");
    Row expectedRow2 = Row.of(Timestamp.valueOf("2020-03-22 00:00:00"), expectedAttributes, 1, 2, 123, "field21", "", "field23");
    testRowDeserialization(deserializer, inLongMsg.buildArray(), Arrays.asList(expectedRow1, expectedRow2));
}
Also used : HashMap(java.util.HashMap) InLongMsg(org.apache.inlong.common.msg.InLongMsg) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 7 with InLongMsg

use of org.apache.inlong.common.msg.InLongMsg in project incubator-inlong by apache.

the class InLongMsgCsvFormatDeserializerTest method testRetainHeadDelimiter.

@Test
public void testRetainHeadDelimiter() throws Exception {
    final InLongMsgCsvFormatDeserializer deserializer = new InLongMsgCsvFormatDeserializer(TEST_ROW_INFO, DEFAULT_TIME_FIELD_NAME, DEFAULT_ATTRIBUTES_FIELD_NAME, Charset.defaultCharset().name(), TableFormatConstants.DEFAULT_DELIMITER, null, null, null, false, false);
    InLongMsg inLongMsg = InLongMsg.newInLongMsg();
    String attrs = "m=0&" + INLONGMSG_ATTR_STREAM_ID + "=testInterfaceId&t=20200322";
    String body = ",1,2,field1,field2,field3";
    inLongMsg.addMsg(attrs, body.getBytes());
    Map<String, String> expectedAttributes = new HashMap<>();
    expectedAttributes.put("m", "0");
    expectedAttributes.put(INLONGMSG_ATTR_STREAM_ID, "testInterfaceId");
    expectedAttributes.put("t", "20200322");
    Row expectedRow = Row.of(Timestamp.valueOf("2020-03-22 00:00:00"), expectedAttributes, null, 1, 2, "field1", "field2", "field3");
    testRowDeserialization(deserializer, inLongMsg.buildArray(), Collections.singletonList(expectedRow));
}
Also used : HashMap(java.util.HashMap) InLongMsg(org.apache.inlong.common.msg.InLongMsg) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 8 with InLongMsg

use of org.apache.inlong.common.msg.InLongMsg in project incubator-inlong by apache.

the class AbstractInLongMsgFormatDeserializer method flatMap.

@Override
public void flatMap(byte[] bytes, Collector<Row> collector) throws Exception {
    InLongMsg inLongMsg = InLongMsg.parseFrom(bytes);
    for (String attr : inLongMsg.getAttrs()) {
        Iterator<byte[]> iterator = inLongMsg.getIterator(attr);
        if (iterator == null) {
            continue;
        }
        InLongMsgHead head;
        try {
            head = parseHead(attr);
        } catch (Exception e) {
            if (ignoreErrors) {
                LOG.warn("Cannot properly parse the head {}.", attr, e);
                continue;
            } else {
                throw e;
            }
        }
        while (iterator.hasNext()) {
            byte[] bodyBytes = iterator.next();
            if (bodyBytes == null || bodyBytes.length == 0) {
                continue;
            }
            InLongMsgBody body;
            try {
                body = parseBody(bodyBytes);
            } catch (Exception e) {
                if (ignoreErrors) {
                    LOG.warn("Cannot properly parse the body {}.", Arrays.toString(bodyBytes), e);
                    continue;
                } else {
                    throw e;
                }
            }
            Row row;
            try {
                row = convertRow(head, body);
            } catch (Exception e) {
                if (ignoreErrors) {
                    LOG.warn("Cannot properly convert the inlongmsg ({}, {}) " + "to row.", head, body, e);
                    continue;
                } else {
                    throw e;
                }
            }
            if (row != null) {
                collector.collect(row);
            }
        }
    }
}
Also used : InLongMsg(org.apache.inlong.common.msg.InLongMsg) Row(org.apache.flink.types.Row)

Example 9 with InLongMsg

use of org.apache.inlong.common.msg.InLongMsg in project incubator-inlong by apache.

the class SimpleMessageHandler method formatMessagesAndSend.

/**
 * formatMessagesAndSend
 *
 * @param  commonAttrMap
 * @param  messageMap
 * @param  strRemoteIP
 * @param  msgType
 * @throws MessageIDException
 */
private void formatMessagesAndSend(Map<String, String> commonAttrMap, Map<String, HashMap<String, List<ProxyMessage>>> messageMap, String strRemoteIP, MsgType msgType) throws MessageIDException {
    int inLongMsgVer = 1;
    if (MsgType.MSG_MULTI_BODY_ATTR.equals(msgType)) {
        inLongMsgVer = 3;
    } else if (MsgType.MSG_BIN_MULTI_BODY.equals(msgType)) {
        inLongMsgVer = 4;
    }
    for (Map.Entry<String, HashMap<String, List<ProxyMessage>>> topicEntry : messageMap.entrySet()) {
        for (Map.Entry<String, List<ProxyMessage>> streamIdEntry : topicEntry.getValue().entrySet()) {
            InLongMsg inLongMsg = InLongMsg.newInLongMsg(this.isCompressed, inLongMsgVer);
            Map<String, String> headers = new HashMap<String, String>();
            for (ProxyMessage message : streamIdEntry.getValue()) {
                if (MsgType.MSG_MULTI_BODY_ATTR.equals(msgType) || MsgType.MSG_MULTI_BODY.equals(msgType)) {
                    message.getAttributeMap().put(AttributeConstants.MESSAGE_COUNT, String.valueOf(1));
                    inLongMsg.addMsg(mapJoiner.join(message.getAttributeMap()), message.getData());
                } else if (MsgType.MSG_BIN_MULTI_BODY.equals(msgType)) {
                    inLongMsg.addMsg(message.getData());
                } else {
                    inLongMsg.addMsg(mapJoiner.join(message.getAttributeMap()), message.getData());
                }
            }
            long pkgTimeInMillis = inLongMsg.getCreatetime();
            String pkgTimeStr = dateFormator.get().format(pkgTimeInMillis);
            if (inLongMsgVer == 4) {
                if (commonAttrMap.containsKey(ConfigConstants.PKG_TIME_KEY)) {
                    pkgTimeStr = commonAttrMap.get(ConfigConstants.PKG_TIME_KEY);
                } else {
                    pkgTimeStr = dateFormator.get().format(System.currentTimeMillis());
                }
            }
            long dtTime = NumberUtils.toLong(commonAttrMap.get(AttributeConstants.DATA_TIME), System.currentTimeMillis());
            headers.put(AttributeConstants.DATA_TIME, String.valueOf(dtTime));
            headers.put(ConfigConstants.TOPIC_KEY, topicEntry.getKey());
            headers.put(AttributeConstants.STREAM_ID, streamIdEntry.getKey());
            headers.put(ConfigConstants.REMOTE_IP_KEY, strRemoteIP);
            headers.put(ConfigConstants.REMOTE_IDC_KEY, DEFAULT_REMOTE_IDC_VALUE);
            // every message share the same msg cnt? what if msgType = 5
            String proxyMetricMsgCnt = commonAttrMap.get(AttributeConstants.MESSAGE_COUNT);
            headers.put(ConfigConstants.MSG_COUNTER_KEY, proxyMetricMsgCnt);
            byte[] data = inLongMsg.buildArray();
            headers.put(ConfigConstants.TOTAL_LEN, String.valueOf(data.length));
            String sequenceId = commonAttrMap.get(AttributeConstants.SEQUENCE_ID);
            if (StringUtils.isNotEmpty(sequenceId)) {
                StringBuilder sidBuilder = new StringBuilder();
                sidBuilder.append(topicEntry.getKey()).append(SEPARATOR).append(streamIdEntry.getKey()).append(SEPARATOR).append(sequenceId);
                headers.put(ConfigConstants.SEQUENCE_ID, sidBuilder.toString());
            }
            headers.put(ConfigConstants.PKG_TIME_KEY, pkgTimeStr);
            // process proxy message list
            this.processProxyMessageList(headers, streamIdEntry.getValue());
        }
    }
}
Also used : ProxyMessage(org.apache.inlong.dataproxy.base.ProxyMessage) HashMap(java.util.HashMap) InLongMsg(org.apache.inlong.common.msg.InLongMsg) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 10 with InLongMsg

use of org.apache.inlong.common.msg.InLongMsg in project incubator-inlong by apache.

the class DeserializationFunctionTest method testProcessElement.

@Test
public void testProcessElement() throws Exception {
    InLongMsg inLongMsg = InLongMsg.newInLongMsg();
    String testData = "testData";
    inLongMsg.addMsg("m=12&iname=tid", testData.getBytes(StandardCharsets.UTF_8));
    SerializedRecord serializedRecord = new SerializedRecord(1, inLongMsg.buildArray());
    FieldInfo[] fieldInfos = { new FieldInfo("content", StringFormatInfo.INSTANCE) };
    DeserializationFunction function = new DeserializationFunction(DeserializationSchemaFactory.build(fieldInfos, null), new FieldMappingTransformer(new Configuration(), fieldInfos));
    ListCollector<Row> collector = new ListCollector<>();
    function.processElement(serializedRecord, null, collector);
    Row row = collector.getInnerList().get(0);
    assertEquals(1, row.getArity());
    assertEquals(testData, row.getField(0));
}
Also used : SerializedRecord(org.apache.inlong.sort.singletenant.flink.SerializedRecord) Configuration(org.apache.inlong.sort.configuration.Configuration) InLongMsg(org.apache.inlong.common.msg.InLongMsg) Row(org.apache.flink.types.Row) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo) Test(org.junit.Test)

Aggregations

InLongMsg (org.apache.inlong.common.msg.InLongMsg)16 HashMap (java.util.HashMap)13 Row (org.apache.flink.types.Row)11 Test (org.junit.Test)11 SimpleDateFormat (java.text.SimpleDateFormat)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 ChannelException (org.apache.flume.ChannelException)2 Event (org.apache.flume.Event)2 ProxyMessage (org.apache.inlong.dataproxy.base.ProxyMessage)2 FieldInfo (org.apache.inlong.sort.protocol.FieldInfo)2 SerializedRecord (org.apache.inlong.sort.singletenant.flink.SerializedRecord)2 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Timestamp (java.sql.Timestamp)1 Iterator (java.util.Iterator)1 HttpServletRequest (javax.servlet.http.HttpServletRequest)1 DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)1 ProcessFunction (org.apache.flink.streaming.api.functions.ProcessFunction)1