use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.
the class InLongMsgCsvFormatFactory method createMixedFormatConverter.
@Override
public InLongMsgCsvMixedFormatConverter createMixedFormatConverter(Map<String, String> properties) {
final DescriptorProperties descriptorProperties = new DescriptorProperties(true);
descriptorProperties.putProperties(properties);
RowFormatInfo rowFormatInfo = getDataFormatInfo(descriptorProperties);
String timeFieldName = descriptorProperties.getOptionalString(FORMAT_TIME_FIELD_NAME).orElse(InLongMsgUtils.DEFAULT_TIME_FIELD_NAME);
String attributesFieldName = descriptorProperties.getOptionalString(FORMAT_ATTRIBUTES_FIELD_NAME).orElse(InLongMsgUtils.DEFAULT_ATTRIBUTES_FIELD_NAME);
validateFieldNames(timeFieldName, attributesFieldName, rowFormatInfo);
String nullLiteral = descriptorProperties.getOptionalString(FORMAT_NULL_LITERAL).orElse(null);
boolean ignoreErrors = descriptorProperties.getOptionalBoolean(FORMAT_IGNORE_ERRORS).orElse(DEFAULT_IGNORE_ERRORS);
return new InLongMsgCsvMixedFormatConverter(rowFormatInfo, timeFieldName, attributesFieldName, nullLiteral, ignoreErrors);
}
use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.
the class InLongMsgUtils method getDataFormatInfo.
public static RowFormatInfo getDataFormatInfo(DescriptorProperties descriptorProperties) {
if (descriptorProperties.containsKey(TableFormatConstants.FORMAT_SCHEMA)) {
return TableFormatUtils.deserializeRowFormatInfo(descriptorProperties);
} else {
TableSchema tableSchema = deriveSchema(descriptorProperties.asMap());
String[] fieldNames = tableSchema.getFieldNames();
DataType[] fieldTypes = tableSchema.getFieldDataTypes();
String[] dataFieldNames = new String[fieldNames.length - 2];
FormatInfo[] dataFieldFormatInfos = new FormatInfo[fieldNames.length - 2];
for (int i = 0; i < dataFieldNames.length; ++i) {
dataFieldNames[i] = fieldNames[i + 2];
dataFieldFormatInfos[i] = TableFormatUtils.deriveFormatInfo(fieldTypes[i + 2].getLogicalType());
}
return new RowFormatInfo(dataFieldNames, dataFieldFormatInfos);
}
}
use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.
the class MultiTenancyDeserializer method generateDeserializer.
@VisibleForTesting
Deserializer<SerializedRecord, Record> generateDeserializer(FieldInfo[] fields, DeserializationInfo deserializationInfo) {
final RowFormatInfo rowFormatInfo = CommonUtils.generateDeserializationRowFormatInfo(fields);
final Deserializer<SerializedRecord, Record> deserializer;
if (deserializationInfo instanceof InLongMsgCsvDeserializationInfo) {
InLongMsgCsvDeserializationInfo inLongMsgCsvDeserializationInfo = (InLongMsgCsvDeserializationInfo) deserializationInfo;
InLongMsgCsvFormatDeserializer inLongMsgCsvFormatDeserializer = new InLongMsgCsvFormatDeserializer(rowFormatInfo, DEFAULT_TIME_FIELD_NAME, DEFAULT_ATTRIBUTES_FIELD_NAME, TableFormatConstants.DEFAULT_CHARSET, inLongMsgCsvDeserializationInfo.getDelimiter(), null, null, null, inLongMsgCsvDeserializationInfo.isDeleteHeadDelimiter(), TableFormatConstants.DEFAULT_IGNORE_ERRORS);
deserializer = new InLongMsgDeserializer(inLongMsgCsvFormatDeserializer);
} else {
// TODO, support more formats here
throw new UnsupportedOperationException("Not supported yet " + deserializationInfo.getClass().getSimpleName());
}
return deserializer;
}
use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.
the class CommonUtilsTest method testBuildAvroRecordSchemaInJsonForRecursiveFields.
@Test
public void testBuildAvroRecordSchemaInJsonForRecursiveFields() throws IOException {
FieldInfo[] testFieldInfos = new FieldInfo[] { new FieldInfo("f1", new ArrayFormatInfo(new MapFormatInfo(new StringFormatInfo(), new ArrayFormatInfo(new ArrayFormatInfo(new ShortFormatInfo()))))), new FieldInfo("f2", new MapFormatInfo(new StringFormatInfo(), new MapFormatInfo(new StringFormatInfo(), new RowFormatInfo(new String[] { "f21", "f22" }, new FormatInfo[] { new IntFormatInfo(), new ArrayFormatInfo(new ByteFormatInfo()) })))), new FieldInfo("f3", new RowFormatInfo(new String[] { "f31", "f32" }, new FormatInfo[] { new ArrayFormatInfo(new StringFormatInfo()), new RowFormatInfo(new String[] { "f321", "f322" }, new FormatInfo[] { new ArrayFormatInfo(new IntFormatInfo()), new MapFormatInfo(new StringFormatInfo(), new ArrayFormatInfo(new ByteFormatInfo())) }) })) };
JsonNode expectedJsonNode = objectMapper.readTree("{\n" + " \"type\":\"record\",\n" + " \"name\":\"record\",\n" + " \"fields\":[\n" + " {\n" + " \"name\":\"f1\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"map\",\n" + " \"values\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " \"int\"\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " },\n" + " {\n" + " \"name\":\"f2\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"map\",\n" + " \"values\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"map\",\n" + " \"values\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"record\",\n" + " \"name\":\"record_f2\",\n" + " \"fields\":[\n" + " {\n" + " \"name\":\"f21\",\n" + " \"type\":[\n" + " \"null\",\n" + " \"int\"\n" + " ],\n" + " \"default\":null\n" + " },\n" + " {\n" + " \"name\":\"f22\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " \"int\"\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " }\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " },\n" + " {\n" + " \"name\":\"f3\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"record\",\n" + " \"name\":\"record_f3\",\n" + " \"fields\":[\n" + " {\n" + " \"name\":\"f31\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " \"string\"\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " },\n" + " {\n" + " \"name\":\"f32\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"record\",\n" + " \"name\":\"record_f3_f32\",\n" + " \"fields\":[\n" + " {\n" + " \"name\":\"f321\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " \"int\"\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " },\n" + " {\n" + " \"name\":\"f322\",\n" + " \"type\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"map\",\n" + " \"values\":[\n" + " \"null\",\n" + " {\n" + " \"type\":\"array\",\n" + " \"items\":[\n" + " \"null\",\n" + " \"int\"\n" + " ]\n" + " }\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " }\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " }\n" + " ]\n" + " }\n" + " ],\n" + " \"default\":null\n" + " }\n" + " ]\n" + "}");
String actualJson = buildAvroRecordSchemaInJson(testFieldInfos);
JsonNode actualJsonNode = objectMapper.readTree(actualJson);
assertEquals(expectedJsonNode, actualJsonNode);
}
use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.
the class RowToAvroKafkaSinkTest method prepareData.
@Override
protected void prepareData() throws IOException, ClassNotFoundException {
fieldInfos = new FieldInfo[] { new FieldInfo("f1", new StringFormatInfo()), new FieldInfo("f2", new IntFormatInfo()), new FieldInfo("f3", new NullFormatInfo()), new FieldInfo("f4", new BinaryFormatInfo()), new FieldInfo("f5", new MapFormatInfo(new StringFormatInfo(), new RowFormatInfo(new String[] { "f51", "f52" }, new FormatInfo[] { new IntFormatInfo(), new ArrayFormatInfo(new DoubleFormatInfo()) }))) };
topic = "test_kafka_row_to_avro";
serializationSchema = SerializationSchemaFactory.build(fieldInfos, new AvroSerializationInfo());
prepareTestRows();
}
Aggregations