Search in sources :

Example 51 with Field

use of org.apache.kafka.connect.data.Field in project ksql by confluentinc.

the class SchemaKStream method rekeyRequired.

private boolean rekeyRequired(List<Expression> groupByExpressions) {
    Field keyField = getKeyField();
    if (keyField == null) {
        return true;
    }
    String keyFieldName = SchemaUtil.getFieldNameWithNoAlias(keyField);
    return !(groupByExpressions.size() == 1 && fieldNameFromExpression(groupByExpressions.get(0)).equals(keyFieldName));
}
Also used : Field(org.apache.kafka.connect.data.Field)

Example 52 with Field

use of org.apache.kafka.connect.data.Field in project ksql by confluentinc.

the class SchemaKStream method groupBy.

public SchemaKGroupedStream groupBy(final Serde<String> keySerde, final Serde<GenericRow> valSerde, final List<Expression> groupByExpressions) {
    boolean rekey = rekeyRequired(groupByExpressions);
    if (!rekey) {
        KGroupedStream kgroupedStream = kstream.groupByKey(Serialized.with(keySerde, valSerde));
        return new SchemaKGroupedStream(schema, kgroupedStream, keyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
    }
    // Collect the column indexes, and build the new key as <column1>+<column2>+...
    StringBuilder aggregateKeyName = new StringBuilder();
    List<Integer> newKeyIndexes = new ArrayList<>();
    boolean addSeparator = false;
    for (Expression groupByExpr : groupByExpressions) {
        if (addSeparator) {
            aggregateKeyName.append("|+|");
        } else {
            addSeparator = true;
        }
        aggregateKeyName.append(groupByExpr.toString());
        newKeyIndexes.add(SchemaUtil.getIndexInSchema(groupByExpr.toString(), getSchema()));
    }
    KGroupedStream kgroupedStream = kstream.filter((key, value) -> value != null).groupBy((key, value) -> {
        StringBuilder newKey = new StringBuilder();
        boolean addSeparator1 = false;
        for (int index : newKeyIndexes) {
            if (addSeparator1) {
                newKey.append("|+|");
            } else {
                addSeparator1 = true;
            }
            newKey.append(String.valueOf(value.getColumns().get(index)));
        }
        return newKey.toString();
    }, Serialized.with(keySerde, valSerde));
    // TODO: if the key is a prefix of the grouping columns then we can
    // use the repartition reflection hack to tell streams not to
    // repartition.
    Field newKeyField = new Field(aggregateKeyName.toString(), -1, Schema.STRING_SCHEMA);
    return new SchemaKGroupedStream(schema, kgroupedStream, newKeyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
}
Also used : Arrays(java.util.Arrays) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Produced(org.apache.kafka.streams.kstream.Produced) SchemaRegistryClient(io.confluent.kafka.schemaregistry.client.SchemaRegistryClient) Serialized(org.apache.kafka.streams.kstream.Serialized) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Pair(io.confluent.ksql.util.Pair) Serde(org.apache.kafka.common.serialization.Serde) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) ExpressionMetadata(io.confluent.ksql.util.ExpressionMetadata) Serdes(org.apache.kafka.common.serialization.Serdes) CodeGenRunner(io.confluent.ksql.codegen.CodeGenRunner) SchemaUtil(io.confluent.ksql.util.SchemaUtil) OutputNode(io.confluent.ksql.planner.plan.OutputNode) Field(org.apache.kafka.connect.data.Field) FunctionRegistry(io.confluent.ksql.function.FunctionRegistry) Set(java.util.Set) KsqlConfig(io.confluent.ksql.util.KsqlConfig) Expression(io.confluent.ksql.parser.tree.Expression) List(java.util.List) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) GenericRow(io.confluent.ksql.GenericRow) Optional(java.util.Optional) KsqlException(io.confluent.ksql.util.KsqlException) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) Collections(java.util.Collections) GenericRowValueTypeEnforcer(io.confluent.ksql.util.GenericRowValueTypeEnforcer) Field(org.apache.kafka.connect.data.Field) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) Expression(io.confluent.ksql.parser.tree.Expression) ArrayList(java.util.ArrayList)

Example 53 with Field

use of org.apache.kafka.connect.data.Field in project ksql by confluentinc.

the class AggregateNode method buildAggregateSchema.

private Schema buildAggregateSchema(final Schema schema, final FunctionRegistry functionRegistry) {
    final SchemaBuilder schemaBuilder = SchemaBuilder.struct();
    final List<Field> fields = schema.fields();
    for (int i = 0; i < getRequiredColumnList().size(); i++) {
        schemaBuilder.field(fields.get(i).name(), fields.get(i).schema());
    }
    for (int aggFunctionVarSuffix = 0; aggFunctionVarSuffix < getFunctionList().size(); aggFunctionVarSuffix++) {
        String udafName = getFunctionList().get(aggFunctionVarSuffix).getName().getSuffix();
        KsqlAggregateFunction aggregateFunction = functionRegistry.getAggregateFunction(udafName, getFunctionList().get(aggFunctionVarSuffix).getArguments(), schema);
        schemaBuilder.field(AggregateExpressionRewriter.AGGREGATE_FUNCTION_VARIABLE_PREFIX + aggFunctionVarSuffix, aggregateFunction.getReturnType());
    }
    return schemaBuilder.build();
}
Also used : KsqlAggregateFunction(io.confluent.ksql.function.KsqlAggregateFunction) Field(org.apache.kafka.connect.data.Field) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder)

Example 54 with Field

use of org.apache.kafka.connect.data.Field in project ksql by confluentinc.

the class KsqlGenericRowAvroDeserializer method deserialize.

@SuppressWarnings("unchecked")
@Override
public GenericRow deserialize(final String topic, final byte[] bytes) {
    if (bytes == null) {
        return null;
    }
    GenericRow genericRow;
    try {
        GenericRecord genericRecord = (GenericRecord) kafkaAvroDeserializer.deserialize(topic, bytes);
        Map<String, String> caseInsensitiveFieldNameMap = getCaseInsensitiveFieldMap(genericRecord);
        List columns = new ArrayList();
        for (Field field : schema.fields()) {
            // Set the missing fields to null. We can make this configurable later.
            if (genericRecord.get(caseInsensitiveFieldNameMap.get(field.name().toUpperCase())) == null) {
                columns.add(null);
            } else {
                columns.add(enforceFieldType(field.schema(), genericRecord.get(caseInsensitiveFieldNameMap.get(field.name().toUpperCase()))));
            }
        }
        genericRow = new GenericRow(columns);
    } catch (Exception e) {
        throw new SerializationException(e);
    }
    return genericRow;
}
Also used : GenericRow(io.confluent.ksql.GenericRow) Field(org.apache.kafka.connect.data.Field) SerializationException(org.apache.kafka.common.errors.SerializationException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) SerializationException(org.apache.kafka.common.errors.SerializationException) KsqlException(io.confluent.ksql.util.KsqlException)

Example 55 with Field

use of org.apache.kafka.connect.data.Field in project connect-utils by jcustenborder.

the class ValueHelper method value.

public static Object value(Schema schema, Object value) {
    if (null == value) {
        return null;
    }
    Object result;
    log.trace("schema.type() = {}", schema.type());
    switch(schema.type()) {
        case BYTES:
            if (Decimal.LOGICAL_NAME.equals(schema.name())) {
                result = decimal(schema, value);
            } else {
                result = bytes(value);
            }
            break;
        case INT32:
            if (Date.LOGICAL_NAME.equals(schema.name())) {
                result = date(schema, value);
            } else if (Time.LOGICAL_NAME.equals(schema.name())) {
                result = time(schema, value);
            } else {
                result = int32(value);
            }
            break;
        case INT16:
            result = int16(value);
            break;
        case INT64:
            if (Timestamp.LOGICAL_NAME.equals(schema.name())) {
                result = timestamp(schema, value);
            } else {
                result = int64(value);
            }
            break;
        case INT8:
            result = int8(value);
            break;
        case FLOAT32:
            result = float32(value);
            break;
        case FLOAT64:
            result = float64(value);
            break;
        case STRUCT:
            if (value instanceof Struct) {
                log.trace("Struct");
                result = value;
            } else if (value instanceof Map) {
                log.trace("Map");
                Map<String, Object> map = (Map<String, Object>) value;
                if (map.containsKey("schema") && map.get("fieldValues") instanceof List) {
                    log.trace("struct stored as map.");
                    Struct struct = ObjectMapperFactory.INSTANCE.convertValue(value, Struct.class);
                    result = struct;
                } else {
                    log.trace("map");
                    Struct struct = new Struct(schema);
                    for (Map.Entry<String, Object> kvp : map.entrySet()) {
                        log.trace("field {}", kvp.getKey());
                        Field field = schema.field(kvp.getKey());
                        if (null == field) {
                            throw new DataException(String.format("Could not find field '%s' of schema '%s'", kvp.getKey(), schema.name()));
                        }
                        struct.put(field, kvp.getValue());
                    }
                    result = struct;
                }
            } else {
                log.trace("not Struct or Map.");
                result = value;
            }
            break;
        default:
            result = value;
            break;
    }
    return result;
}
Also used : Field(org.apache.kafka.connect.data.Field) DataException(org.apache.kafka.connect.errors.DataException) List(java.util.List) Map(java.util.Map) Struct(org.apache.kafka.connect.data.Struct)

Aggregations

Field (org.apache.kafka.connect.data.Field)82 Struct (org.apache.kafka.connect.data.Struct)38 Schema (org.apache.kafka.connect.data.Schema)33 SchemaBuilder (org.apache.kafka.connect.data.SchemaBuilder)17 DataException (org.apache.kafka.connect.errors.DataException)14 List (java.util.List)12 ArrayList (java.util.ArrayList)11 Requirements.requireStruct (org.apache.kafka.connect.transforms.util.Requirements.requireStruct)11 HashMap (java.util.HashMap)10 Map (java.util.Map)8 Test (org.junit.Test)8 Date (java.util.Date)7 ConnectSchema (org.apache.kafka.connect.data.ConnectSchema)6 KsqlException (io.confluent.ksql.util.KsqlException)5 BigDecimal (java.math.BigDecimal)5 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)4 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)4 SchemaKStream (io.confluent.ksql.structured.SchemaKStream)4 ByteBuffer (java.nio.ByteBuffer)4 JsonNode (com.fasterxml.jackson.databind.JsonNode)3