Search in sources :

Example 16 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class KvFormatFactory method createDeserializationSchema.

@Override
public KvDeserializationSchema createDeserializationSchema(Map<String, String> properties) {
    final DescriptorProperties descriptorProperties = getValidatedProperties(properties);
    final RowFormatInfo rowFormatInfo = TableFormatUtils.getRowFormatInfo(descriptorProperties);
    return buildDeserializationSchema(descriptorProperties, rowFormatInfo);
}
Also used : DescriptorProperties(org.apache.flink.table.descriptors.DescriptorProperties) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo)

Example 17 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class KvFormatFactory method buildSerializationSchema.

private static KvSerializationSchema buildSerializationSchema(DescriptorProperties descriptorProperties, RowFormatInfo rowFormatInfo) {
    for (FormatInfo formatInfo : rowFormatInfo.getFieldFormatInfos()) {
        if (!(formatInfo instanceof BasicFormatInfo)) {
            throw new ValidationException("Currently only basic formats " + "are supported in kv formats.");
        }
    }
    KvSerializationSchema.Builder builder = new KvSerializationSchema.Builder(rowFormatInfo);
    descriptorProperties.getOptionalString(TableFormatConstants.FORMAT_CHARSET).ifPresent(builder::setCharset);
    descriptorProperties.getOptionalCharacter(TableFormatConstants.FORMAT_ENTRY_DELIMITER).ifPresent(builder::setEntryDelimiter);
    descriptorProperties.getOptionalCharacter(TableFormatConstants.FORMAT_KV_DELIMITER).ifPresent(builder::setKvDelimiter);
    descriptorProperties.getOptionalCharacter(TableFormatConstants.FORMAT_ESCAPE_CHARACTER).ifPresent(builder::setEscapeCharacter);
    descriptorProperties.getOptionalCharacter(TableFormatConstants.FORMAT_QUOTE_CHARACTER).ifPresent(builder::setQuoteCharacter);
    descriptorProperties.getOptionalString(TableFormatConstants.FORMAT_NULL_LITERAL).ifPresent(builder::setNullLiteral);
    return builder.build();
}
Also used : ValidationException(org.apache.flink.table.api.ValidationException) FormatInfo(org.apache.inlong.sort.formats.common.FormatInfo) BasicFormatInfo(org.apache.inlong.sort.formats.common.BasicFormatInfo) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) BasicFormatInfo(org.apache.inlong.sort.formats.common.BasicFormatInfo)

Example 18 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class KvDeserializationSchemaTest method testBasicDeserialization.

private static <T> void testBasicDeserialization(Consumer<KvDeserializationSchema.Builder> config, BasicFormatInfo<T> basicFormatInfo, T expectedRecord, String text) throws IOException {
    RowFormatInfo rowFormatInfo = new RowFormatInfo(new String[] { "f" }, new FormatInfo[] { basicFormatInfo });
    KvDeserializationSchema.Builder builder = new KvDeserializationSchema.Builder(rowFormatInfo);
    config.accept(builder);
    KvDeserializationSchema deserializer = builder.build();
    Row row = deserializer.deserialize(text.getBytes());
    assertEquals(1, row.getArity());
    assertEquals(expectedRecord, row.getField(0));
}
Also used : RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) Row(org.apache.flink.types.Row)

Example 19 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class InLongMsgCsvUtils method buildRow.

public static Row buildRow(RowFormatInfo rowFormatInfo, String nullLiteral, Timestamp time, Map<String, String> attributes, List<String> predefinedFields, List<String> fields) {
    String[] fieldNames = rowFormatInfo.getFieldNames();
    FormatInfo[] fieldFormatInfos = rowFormatInfo.getFieldFormatInfos();
    int actualNumFields = predefinedFields.size() + fields.size();
    if (actualNumFields != fieldNames.length) {
        LOG.warn("The number of fields mismatches: " + fieldNames.length + " expected, but was " + actualNumFields + ".");
    }
    Row row = new Row(2 + fieldNames.length);
    row.setField(0, time);
    row.setField(1, attributes);
    for (int i = 0; i < predefinedFields.size(); ++i) {
        if (i >= fieldNames.length) {
            break;
        }
        String fieldName = fieldNames[i];
        FormatInfo fieldFormatInfo = fieldFormatInfos[i];
        String fieldText = predefinedFields.get(i);
        Object field = TableFormatUtils.deserializeBasicField(fieldName, fieldFormatInfo, fieldText, nullLiteral);
        row.setField(i + 2, field);
    }
    for (int i = 0; i < fields.size(); ++i) {
        if (i + predefinedFields.size() >= fieldNames.length) {
            break;
        }
        String fieldName = fieldNames[i + predefinedFields.size()];
        FormatInfo fieldFormatInfo = fieldFormatInfos[i + predefinedFields.size()];
        String fieldText = fields.get(i);
        Object field = TableFormatUtils.deserializeBasicField(fieldName, fieldFormatInfo, fieldText, nullLiteral);
        row.setField(i + predefinedFields.size() + 2, field);
    }
    for (int i = predefinedFields.size() + fields.size(); i < fieldNames.length; ++i) {
        row.setField(i + 2, null);
    }
    return row;
}
Also used : Row(org.apache.flink.types.Row) FormatInfo(org.apache.inlong.sort.formats.common.FormatInfo) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo)

Example 20 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class CsvFormatFactory method createFormatSerializer.

@Override
public TableFormatSerializer createFormatSerializer(Map<String, String> properties) {
    final DescriptorProperties descriptorProperties = getValidatedProperties(properties);
    final RowFormatInfo rowFormatInfo = TableFormatUtils.getRowFormatInfo(descriptorProperties);
    final CsvSerializationSchema serializationSchema = buildSerializationSchema(descriptorProperties, rowFormatInfo);
    boolean ignoreErrors = descriptorProperties.getOptionalBoolean(TableFormatConstants.FORMAT_IGNORE_ERRORS).orElse(TableFormatConstants.DEFAULT_IGNORE_ERRORS);
    return new DefaultTableFormatSerializer(serializationSchema, ignoreErrors);
}
Also used : DescriptorProperties(org.apache.flink.table.descriptors.DescriptorProperties) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) DefaultTableFormatSerializer(org.apache.inlong.sort.formats.base.DefaultTableFormatSerializer)

Aggregations

RowFormatInfo (org.apache.inlong.sort.formats.common.RowFormatInfo)34 DescriptorProperties (org.apache.flink.table.descriptors.DescriptorProperties)14 FormatInfo (org.apache.inlong.sort.formats.common.FormatInfo)14 BasicFormatInfo (org.apache.inlong.sort.formats.common.BasicFormatInfo)8 StringFormatInfo (org.apache.inlong.sort.formats.common.StringFormatInfo)8 ArrayFormatInfo (org.apache.inlong.sort.formats.common.ArrayFormatInfo)6 IntFormatInfo (org.apache.inlong.sort.formats.common.IntFormatInfo)6 MapFormatInfo (org.apache.inlong.sort.formats.common.MapFormatInfo)6 ValidationException (org.apache.flink.table.api.ValidationException)5 BinaryFormatInfo (org.apache.inlong.sort.formats.common.BinaryFormatInfo)5 BooleanFormatInfo (org.apache.inlong.sort.formats.common.BooleanFormatInfo)5 ByteFormatInfo (org.apache.inlong.sort.formats.common.ByteFormatInfo)5 DateFormatInfo (org.apache.inlong.sort.formats.common.DateFormatInfo)5 DoubleFormatInfo (org.apache.inlong.sort.formats.common.DoubleFormatInfo)5 NullFormatInfo (org.apache.inlong.sort.formats.common.NullFormatInfo)5 ShortFormatInfo (org.apache.inlong.sort.formats.common.ShortFormatInfo)5 TimeFormatInfo (org.apache.inlong.sort.formats.common.TimeFormatInfo)5 TimestampFormatInfo (org.apache.inlong.sort.formats.common.TimestampFormatInfo)5 Row (org.apache.flink.types.Row)4 DecimalFormatInfo (org.apache.inlong.sort.formats.common.DecimalFormatInfo)4