Search in sources :

Example 1 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class TableFormatUtils method deriveFormatInfo.

/**
 * Derive the format information for the given type.
 *
 * @param logicalType The type whose format is derived.
 * @return The format information for the given type.
 */
public static FormatInfo deriveFormatInfo(LogicalType logicalType) {
    if (logicalType instanceof VarCharType) {
        return StringFormatInfo.INSTANCE;
    } else if (logicalType instanceof BooleanType) {
        return BooleanFormatInfo.INSTANCE;
    } else if (logicalType instanceof TinyIntType) {
        return ByteFormatInfo.INSTANCE;
    } else if (logicalType instanceof SmallIntType) {
        return ShortFormatInfo.INSTANCE;
    } else if (logicalType instanceof IntType) {
        return IntFormatInfo.INSTANCE;
    } else if (logicalType instanceof BigIntType) {
        return LongFormatInfo.INSTANCE;
    } else if (logicalType instanceof FloatType) {
        return FloatFormatInfo.INSTANCE;
    } else if (logicalType instanceof DoubleType) {
        return DoubleFormatInfo.INSTANCE;
    } else if (logicalType instanceof DecimalType) {
        return DecimalFormatInfo.INSTANCE;
    } else if (logicalType instanceof DateType) {
        return new DateFormatInfo();
    } else if (logicalType instanceof TimeType) {
        return new TimeFormatInfo();
    } else if (logicalType instanceof TimestampType) {
        return new TimestampFormatInfo();
    } else if (logicalType instanceof LocalZonedTimestampType) {
        return new LocalZonedTimestampFormatInfo();
    } else if (logicalType instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) logicalType;
        LogicalType elementType = arrayType.getElementType();
        FormatInfo elementFormatInfo = deriveFormatInfo(elementType);
        return new ArrayFormatInfo(elementFormatInfo);
    } else if (logicalType instanceof MapType) {
        MapType mapType = (MapType) logicalType;
        LogicalType keyType = mapType.getKeyType();
        LogicalType valueType = mapType.getValueType();
        FormatInfo keyFormatInfo = deriveFormatInfo(keyType);
        FormatInfo valueFormatInfo = deriveFormatInfo(valueType);
        return new MapFormatInfo(keyFormatInfo, valueFormatInfo);
    } else if (logicalType instanceof RowType) {
        RowType rowType = (RowType) logicalType;
        List<RowType.RowField> rowFields = rowType.getFields();
        String[] fieldNames = new String[rowFields.size()];
        FormatInfo[] fieldFormatInfos = new FormatInfo[rowFields.size()];
        for (int i = 0; i < rowFields.size(); ++i) {
            RowType.RowField rowField = rowFields.get(i);
            fieldNames[i] = rowField.getName();
            fieldFormatInfos[i] = deriveFormatInfo(rowField.getType());
        }
        return new RowFormatInfo(fieldNames, fieldFormatInfos);
    } else if (logicalType instanceof BinaryType) {
        return BinaryFormatInfo.INSTANCE;
    } else if (logicalType instanceof NullType) {
        return NullFormatInfo.INSTANCE;
    } else {
        throw new UnsupportedOperationException();
    }
}
Also used : MapFormatInfo(org.apache.inlong.sort.formats.common.MapFormatInfo) LocalZonedTimestampFormatInfo(org.apache.inlong.sort.formats.common.LocalZonedTimestampFormatInfo) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) BigIntType(org.apache.flink.table.types.logical.BigIntType) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrayFormatInfo(org.apache.inlong.sort.formats.common.ArrayFormatInfo) MapType(org.apache.flink.table.types.logical.MapType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) TimeType(org.apache.flink.table.types.logical.TimeType) ArrayType(org.apache.flink.table.types.logical.ArrayType) DateFormatInfo(org.apache.inlong.sort.formats.common.DateFormatInfo) LocalZonedTimestampFormatInfo(org.apache.inlong.sort.formats.common.LocalZonedTimestampFormatInfo) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) TimestampType(org.apache.flink.table.types.logical.TimestampType) List(java.util.List) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType) BinaryType(org.apache.flink.table.types.logical.BinaryType) BooleanType(org.apache.flink.table.types.logical.BooleanType) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) TimeFormatInfo(org.apache.inlong.sort.formats.common.TimeFormatInfo) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) DecimalType(org.apache.flink.table.types.logical.DecimalType) NullType(org.apache.flink.table.types.logical.NullType) FormatInfo(org.apache.inlong.sort.formats.common.FormatInfo) BasicFormatInfo(org.apache.inlong.sort.formats.common.BasicFormatInfo) DoubleFormatInfo(org.apache.inlong.sort.formats.common.DoubleFormatInfo) BinaryFormatInfo(org.apache.inlong.sort.formats.common.BinaryFormatInfo) ArrayFormatInfo(org.apache.inlong.sort.formats.common.ArrayFormatInfo) BooleanFormatInfo(org.apache.inlong.sort.formats.common.BooleanFormatInfo) NullFormatInfo(org.apache.inlong.sort.formats.common.NullFormatInfo) IntFormatInfo(org.apache.inlong.sort.formats.common.IntFormatInfo) LocalZonedTimestampFormatInfo(org.apache.inlong.sort.formats.common.LocalZonedTimestampFormatInfo) DecimalFormatInfo(org.apache.inlong.sort.formats.common.DecimalFormatInfo) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) ShortFormatInfo(org.apache.inlong.sort.formats.common.ShortFormatInfo) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) FloatFormatInfo(org.apache.inlong.sort.formats.common.FloatFormatInfo) ByteFormatInfo(org.apache.inlong.sort.formats.common.ByteFormatInfo) TimeFormatInfo(org.apache.inlong.sort.formats.common.TimeFormatInfo) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) MapFormatInfo(org.apache.inlong.sort.formats.common.MapFormatInfo) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) DateFormatInfo(org.apache.inlong.sort.formats.common.DateFormatInfo)

Example 2 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class TableFormatUtils method deserializeRowFormatInfo.

/**
 * Returns the format defined in the given property.
 *
 * @param descriptorProperties The properties of the descriptor.
 * @return The basic row format defined in the descriptor.
 */
public static RowFormatInfo deserializeRowFormatInfo(DescriptorProperties descriptorProperties) {
    try {
        String schema = descriptorProperties.getString(FORMAT_SCHEMA);
        FormatInfo formatInfo = FormatUtils.demarshall(schema);
        if (!(formatInfo instanceof RowFormatInfo)) {
            throw new IllegalStateException("Unexpected format type.");
        }
        return (RowFormatInfo) formatInfo;
    } catch (Exception e) {
        throw new ValidationException("The schema is invalid.", e);
    }
}
Also used : ValidationException(org.apache.flink.table.api.ValidationException) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) FormatInfo(org.apache.inlong.sort.formats.common.FormatInfo) BasicFormatInfo(org.apache.inlong.sort.formats.common.BasicFormatInfo) DoubleFormatInfo(org.apache.inlong.sort.formats.common.DoubleFormatInfo) BinaryFormatInfo(org.apache.inlong.sort.formats.common.BinaryFormatInfo) ArrayFormatInfo(org.apache.inlong.sort.formats.common.ArrayFormatInfo) BooleanFormatInfo(org.apache.inlong.sort.formats.common.BooleanFormatInfo) NullFormatInfo(org.apache.inlong.sort.formats.common.NullFormatInfo) IntFormatInfo(org.apache.inlong.sort.formats.common.IntFormatInfo) LocalZonedTimestampFormatInfo(org.apache.inlong.sort.formats.common.LocalZonedTimestampFormatInfo) DecimalFormatInfo(org.apache.inlong.sort.formats.common.DecimalFormatInfo) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) ShortFormatInfo(org.apache.inlong.sort.formats.common.ShortFormatInfo) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) FloatFormatInfo(org.apache.inlong.sort.formats.common.FloatFormatInfo) ByteFormatInfo(org.apache.inlong.sort.formats.common.ByteFormatInfo) TimeFormatInfo(org.apache.inlong.sort.formats.common.TimeFormatInfo) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) MapFormatInfo(org.apache.inlong.sort.formats.common.MapFormatInfo) LongFormatInfo(org.apache.inlong.sort.formats.common.LongFormatInfo) DateFormatInfo(org.apache.inlong.sort.formats.common.DateFormatInfo) ValidationException(org.apache.flink.table.api.ValidationException)

Example 3 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class MultiTenancyInLongMsgMixedDeserializer method generateDeserializer.

@VisibleForTesting
Pair<AbstractInLongMsgMixedFormatDeserializer, InLongMsgMixedFormatConverter> generateDeserializer(FieldInfo[] fields, InLongMsgDeserializationInfo inLongMsgDeserializationInfo) {
    final RowFormatInfo rowFormatInfo = CommonUtils.generateDeserializationRowFormatInfo(fields);
    final AbstractInLongMsgMixedFormatDeserializer preDeserializer;
    final InLongMsgMixedFormatConverter deserializer;
    if (inLongMsgDeserializationInfo instanceof InLongMsgCsvDeserializationInfo) {
        final InLongMsgCsvDeserializationInfo csvDeserializationInfo = (InLongMsgCsvDeserializationInfo) inLongMsgDeserializationInfo;
        preDeserializer = new InLongMsgCsvMixedFormatDeserializer(StandardCharsets.UTF_8.name(), csvDeserializationInfo.getDelimiter(), null, null, csvDeserializationInfo.isDeleteHeadDelimiter(), false);
        deserializer = new InLongMsgCsvMixedFormatConverter(rowFormatInfo, DEFAULT_TIME_FIELD_NAME, DEFAULT_ATTRIBUTES_FIELD_NAME, null, false);
    } else {
        throw new UnsupportedOperationException("Not supported yet " + inLongMsgDeserializationInfo.getClass().getSimpleName());
    }
    return Pair.of(preDeserializer, deserializer);
}
Also used : InLongMsgMixedFormatConverter(org.apache.inlong.sort.formats.inlongmsg.InLongMsgMixedFormatConverter) InLongMsgCsvMixedFormatDeserializer(org.apache.inlong.sort.formats.inlongmsgcsv.InLongMsgCsvMixedFormatDeserializer) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) InLongMsgCsvMixedFormatConverter(org.apache.inlong.sort.formats.inlongmsgcsv.InLongMsgCsvMixedFormatConverter) InLongMsgCsvDeserializationInfo(org.apache.inlong.sort.protocol.deserialization.InLongMsgCsvDeserializationInfo) AbstractInLongMsgMixedFormatDeserializer(org.apache.inlong.sort.formats.inlongmsg.AbstractInLongMsgMixedFormatDeserializer) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class CommonUtils method convertFieldInfosToLogicalType.

public static LogicalType convertFieldInfosToLogicalType(FieldInfo[] fieldInfos) {
    int fieldLength = fieldInfos.length;
    String[] fieldNames = new String[fieldLength];
    FormatInfo[] fieldFormatInfos = new FormatInfo[fieldLength];
    for (int i = 0; i < fieldLength; i++) {
        fieldNames[i] = fieldInfos[i].getName();
        fieldFormatInfos[i] = fieldInfos[i].getFormatInfo();
    }
    RowFormatInfo rowFormatInfo = new RowFormatInfo(fieldNames, fieldFormatInfos);
    return deriveLogicalType(rowFormatInfo);
}
Also used : RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) FormatInfo(org.apache.inlong.sort.formats.common.FormatInfo) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) StringFormatInfo(org.apache.inlong.sort.formats.common.StringFormatInfo) TimeFormatInfo(org.apache.inlong.sort.formats.common.TimeFormatInfo) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo) DateFormatInfo(org.apache.inlong.sort.formats.common.DateFormatInfo)

Example 5 with RowFormatInfo

use of org.apache.inlong.sort.formats.common.RowFormatInfo in project incubator-inlong by apache.

the class KvFormatFactory method createFormatDeserializer.

@Override
public TableFormatDeserializer createFormatDeserializer(Map<String, String> properties) {
    final DescriptorProperties descriptorProperties = getValidatedProperties(properties);
    final RowFormatInfo rowFormatInfo = TableFormatUtils.getRowFormatInfo(descriptorProperties);
    final KvDeserializationSchema deserializationSchema = buildDeserializationSchema(descriptorProperties, rowFormatInfo);
    boolean ignoreErrors = descriptorProperties.getOptionalBoolean(TableFormatConstants.FORMAT_IGNORE_ERRORS).orElse(TableFormatConstants.DEFAULT_IGNORE_ERRORS);
    return new DefaultTableFormatDeserializer(deserializationSchema, ignoreErrors);
}
Also used : DefaultTableFormatDeserializer(org.apache.inlong.sort.formats.base.DefaultTableFormatDeserializer) DescriptorProperties(org.apache.flink.table.descriptors.DescriptorProperties) RowFormatInfo(org.apache.inlong.sort.formats.common.RowFormatInfo)

Aggregations

RowFormatInfo (org.apache.inlong.sort.formats.common.RowFormatInfo)34 DescriptorProperties (org.apache.flink.table.descriptors.DescriptorProperties)14 FormatInfo (org.apache.inlong.sort.formats.common.FormatInfo)14 BasicFormatInfo (org.apache.inlong.sort.formats.common.BasicFormatInfo)8 StringFormatInfo (org.apache.inlong.sort.formats.common.StringFormatInfo)8 ArrayFormatInfo (org.apache.inlong.sort.formats.common.ArrayFormatInfo)6 IntFormatInfo (org.apache.inlong.sort.formats.common.IntFormatInfo)6 MapFormatInfo (org.apache.inlong.sort.formats.common.MapFormatInfo)6 ValidationException (org.apache.flink.table.api.ValidationException)5 BinaryFormatInfo (org.apache.inlong.sort.formats.common.BinaryFormatInfo)5 BooleanFormatInfo (org.apache.inlong.sort.formats.common.BooleanFormatInfo)5 ByteFormatInfo (org.apache.inlong.sort.formats.common.ByteFormatInfo)5 DateFormatInfo (org.apache.inlong.sort.formats.common.DateFormatInfo)5 DoubleFormatInfo (org.apache.inlong.sort.formats.common.DoubleFormatInfo)5 NullFormatInfo (org.apache.inlong.sort.formats.common.NullFormatInfo)5 ShortFormatInfo (org.apache.inlong.sort.formats.common.ShortFormatInfo)5 TimeFormatInfo (org.apache.inlong.sort.formats.common.TimeFormatInfo)5 TimestampFormatInfo (org.apache.inlong.sort.formats.common.TimestampFormatInfo)5 Row (org.apache.flink.types.Row)4 DecimalFormatInfo (org.apache.inlong.sort.formats.common.DecimalFormatInfo)4