Search in sources :

Example 1 with TupleTypeInfoBase

use of org.apache.flink.api.java.typeutils.TupleTypeInfoBase in project flink by apache.

the class AggregateOperator method and.

public AggregateOperator<IN> and(Aggregations function, int field) {
    Preconditions.checkNotNull(function);
    TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();
    if (field < 0 || field >= inType.getArity()) {
        throw new IllegalArgumentException("Aggregation field position is out of range.");
    }
    AggregationFunctionFactory factory = function.getFactory();
    AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
    this.aggregationFunctions.add(aggFunct);
    this.fields.add(field);
    return this;
}
Also used : TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) AggregationFunctionFactory(org.apache.flink.api.java.aggregation.AggregationFunctionFactory)

Example 2 with TupleTypeInfoBase

use of org.apache.flink.api.java.typeutils.TupleTypeInfoBase in project flink by apache.

the class DataSetUtils method summarize.

// --------------------------------------------------------------------------------------------
//  Summarize
// --------------------------------------------------------------------------------------------
/**
	 * Summarize a DataSet of Tuples by collecting single pass statistics for all columns
	 *
	 * Example usage:
	 * <pre>
	 * {@code
	 * Dataset<Tuple3<Double, String, Boolean>> input = // [...]
	 * Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input)
	 *
	 * summary.f0.getStandardDeviation()
	 * summary.f1.getMaxLength()
	 * }
	 * </pre>
	 * @return the summary as a Tuple the same width as input rows
	 */
public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception {
    if (!input.getType().isTupleType()) {
        throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples");
    }
    final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();
    DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() {

        @Override
        public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception {
            TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType);
            for (Tuple value : values) {
                aggregator.aggregate(value);
            }
            out.collect(aggregator);
        }
    }).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() {

        @Override
        public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception {
            agg1.combine(agg2);
            return agg1;
        }
    });
    return result.collect().get(0).result();
}
Also used : RichMapPartitionFunction(org.apache.flink.api.common.functions.RichMapPartitionFunction) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) Collector(org.apache.flink.util.Collector) Tuple(org.apache.flink.api.java.tuple.Tuple) TupleSummaryAggregator(org.apache.flink.api.java.summarize.aggregation.TupleSummaryAggregator)

Example 3 with TupleTypeInfoBase

use of org.apache.flink.api.java.typeutils.TupleTypeInfoBase in project flink by apache.

the class ScalaAggregateOperator method and.

public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
    Preconditions.checkNotNull(function);
    TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();
    if (field < 0 || field >= inType.getArity()) {
        throw new IllegalArgumentException("Aggregation field position is out of range.");
    }
    AggregationFunctionFactory factory = function.getFactory();
    AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());
    this.aggregationFunctions.add(aggFunct);
    this.fields.add(field);
    return this;
}
Also used : TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) AggregationFunctionFactory(org.apache.flink.api.java.aggregation.AggregationFunctionFactory)

Example 4 with TupleTypeInfoBase

use of org.apache.flink.api.java.typeutils.TupleTypeInfoBase in project flink by apache.

the class FieldAccessorFactory method getAccessor.

/**
	 * Creates a {@link FieldAccessor} for the field that is given by a field expression,
	 * which can be used to get and set the specified field on instances of this type.
	 *
	 * @param field The field expression
	 * @param config Configuration object
	 * @param <F> The type of the field to access
	 * @return The created FieldAccessor
	 */
@Internal
public static <T, F> FieldAccessor<T, F> getAccessor(TypeInformation<T> typeInfo, String field, ExecutionConfig config) {
    // In case of arrays
    if (typeInfo instanceof BasicArrayTypeInfo || typeInfo instanceof PrimitiveArrayTypeInfo) {
        try {
            return new FieldAccessor.ArrayFieldAccessor<>(Integer.parseInt(field), typeInfo);
        } catch (NumberFormatException ex) {
            throw new CompositeType.InvalidFieldReferenceException("A field expression on an array must be an integer index (that might be given as a string).");
        }
    // In case of basic types
    } else if (typeInfo instanceof BasicTypeInfo) {
        try {
            int pos = field.equals(Keys.ExpressionKeys.SELECT_ALL_CHAR) ? 0 : Integer.parseInt(field);
            return FieldAccessorFactory.getAccessor(typeInfo, pos, config);
        } catch (NumberFormatException ex) {
            throw new CompositeType.InvalidFieldReferenceException("You tried to select the field \"" + field + "\" on a " + typeInfo.toString() + ". A field expression on a basic type can only be \"*\" or \"0\"" + " (both of which mean selecting the entire basic type).");
        }
    // In case of Pojos
    } else if (typeInfo instanceof PojoTypeInfo) {
        FieldExpression decomp = decomposeFieldExpression(field);
        PojoTypeInfo<?> pojoTypeInfo = (PojoTypeInfo) typeInfo;
        int fieldIndex = pojoTypeInfo.getFieldIndex(decomp.head);
        if (fieldIndex == -1) {
            throw new CompositeType.InvalidFieldReferenceException("Unable to find field \"" + decomp.head + "\" in type " + typeInfo + ".");
        } else {
            PojoField pojoField = pojoTypeInfo.getPojoFieldAt(fieldIndex);
            TypeInformation<?> fieldType = pojoTypeInfo.getTypeAt(fieldIndex);
            if (decomp.tail == null) {
                @SuppressWarnings("unchecked") FieldAccessor<F, F> innerAccessor = new FieldAccessor.SimpleFieldAccessor<>((TypeInformation<F>) fieldType);
                return new FieldAccessor.PojoFieldAccessor<>(pojoField.getField(), innerAccessor);
            } else {
                @SuppressWarnings("unchecked") FieldAccessor<Object, F> innerAccessor = FieldAccessorFactory.getAccessor((TypeInformation<Object>) fieldType, decomp.tail, config);
                return new FieldAccessor.PojoFieldAccessor<>(pojoField.getField(), innerAccessor);
            }
        }
    // In case of case classes
    } else if (typeInfo.isTupleType() && ((TupleTypeInfoBase) typeInfo).isCaseClass()) {
        TupleTypeInfoBase tupleTypeInfo = (TupleTypeInfoBase) typeInfo;
        FieldExpression decomp = decomposeFieldExpression(field);
        int fieldPos = tupleTypeInfo.getFieldIndex(decomp.head);
        if (fieldPos < 0) {
            throw new CompositeType.InvalidFieldReferenceException("Invalid field selected: " + field);
        }
        if (decomp.tail == null) {
            return new FieldAccessor.SimpleProductFieldAccessor<>(fieldPos, typeInfo, config);
        } else {
            @SuppressWarnings("unchecked") FieldAccessor<Object, F> innerAccessor = getAccessor(tupleTypeInfo.getTypeAt(fieldPos), decomp.tail, config);
            return new FieldAccessor.RecursiveProductFieldAccessor<>(fieldPos, typeInfo, innerAccessor, config);
        }
    // In case of tuples
    } else if (typeInfo.isTupleType()) {
        TupleTypeInfo tupleTypeInfo = (TupleTypeInfo) typeInfo;
        FieldExpression decomp = decomposeFieldExpression(field);
        int fieldPos = tupleTypeInfo.getFieldIndex(decomp.head);
        if (fieldPos == -1) {
            try {
                fieldPos = Integer.parseInt(decomp.head);
            } catch (NumberFormatException ex) {
                throw new CompositeType.InvalidFieldReferenceException("Tried to select field \"" + decomp.head + "\" on " + typeInfo.toString() + " . Only integer values are allowed here.");
            }
        }
        if (decomp.tail == null) {
            @SuppressWarnings("unchecked") FieldAccessor<T, F> result = new FieldAccessor.SimpleTupleFieldAccessor(fieldPos, tupleTypeInfo);
            return result;
        } else {
            @SuppressWarnings("unchecked") FieldAccessor<?, F> innerAccessor = getAccessor(tupleTypeInfo.getTypeAt(fieldPos), decomp.tail, config);
            @SuppressWarnings("unchecked") FieldAccessor<T, F> result = new FieldAccessor.RecursiveTupleFieldAccessor(fieldPos, innerAccessor, tupleTypeInfo);
            return result;
        }
    // Default statement
    } else {
        throw new CompositeType.InvalidFieldReferenceException("Cannot reference field by field expression on " + typeInfo.toString() + "Field expressions are only supported on POJO types, tuples, and case classes. " + "(See the Flink documentation on what is considered a POJO.)");
    }
}
Also used : PojoTypeInfo(org.apache.flink.api.java.typeutils.PojoTypeInfo) TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) PojoField(org.apache.flink.api.java.typeutils.PojoField) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) CompositeType(org.apache.flink.api.common.typeutils.CompositeType) Internal(org.apache.flink.annotation.Internal)

Example 5 with TupleTypeInfoBase

use of org.apache.flink.api.java.typeutils.TupleTypeInfoBase in project flink by apache.

the class FieldAccessorFactory method getAccessor.

/**
	 * Creates a {@link FieldAccessor} for the given field position, which can be used to get and set
	 * the specified field on instances of this type.
	 *
	 * @param pos The field position (zero-based)
	 * @param config Configuration object
	 * @param <F> The type of the field to access
	 * @return The created FieldAccessor
	 */
@Internal
public static <T, F> FieldAccessor<T, F> getAccessor(TypeInformation<T> typeInfo, int pos, ExecutionConfig config) {
    // In case of arrays
    if (typeInfo instanceof BasicArrayTypeInfo || typeInfo instanceof PrimitiveArrayTypeInfo) {
        return new FieldAccessor.ArrayFieldAccessor<>(pos, typeInfo);
    // In case of basic types
    } else if (typeInfo instanceof BasicTypeInfo) {
        if (pos != 0) {
            throw new CompositeType.InvalidFieldReferenceException("The " + ((Integer) pos).toString() + ". field selected on a " + "basic type (" + typeInfo.toString() + "). A field expression on a basic type can only select " + "the 0th field (which means selecting the entire basic type).");
        }
        @SuppressWarnings("unchecked") FieldAccessor<T, F> result = (FieldAccessor<T, F>) new FieldAccessor.SimpleFieldAccessor<>(typeInfo);
        return result;
    // In case of case classes
    } else if (typeInfo.isTupleType() && ((TupleTypeInfoBase) typeInfo).isCaseClass()) {
        TupleTypeInfoBase tupleTypeInfo = (TupleTypeInfoBase) typeInfo;
        @SuppressWarnings("unchecked") TypeInformation<F> fieldTypeInfo = (TypeInformation<F>) tupleTypeInfo.getTypeAt(pos);
        return new FieldAccessor.RecursiveProductFieldAccessor<>(pos, typeInfo, new FieldAccessor.SimpleFieldAccessor<>(fieldTypeInfo), config);
    // In case of tuples
    } else if (typeInfo.isTupleType()) {
        @SuppressWarnings("unchecked") FieldAccessor<T, F> result = new FieldAccessor.SimpleTupleFieldAccessor(pos, typeInfo);
        return result;
    // Default case, PojoType is directed to this statement
    } else {
        throw new CompositeType.InvalidFieldReferenceException("Cannot reference field by position on " + typeInfo.toString() + "Referencing a field by position is supported on tuples, case classes, and arrays. " + "Additionally, you can select the 0th field of a primitive/basic type (e.g. int).");
    }
}
Also used : TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) CompositeType(org.apache.flink.api.common.typeutils.CompositeType) Internal(org.apache.flink.annotation.Internal)

Aggregations

TupleTypeInfoBase (org.apache.flink.api.java.typeutils.TupleTypeInfoBase)5 Internal (org.apache.flink.annotation.Internal)2 BasicArrayTypeInfo (org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo)2 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)2 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)2 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)2 AggregationFunctionFactory (org.apache.flink.api.java.aggregation.AggregationFunctionFactory)2 MapPartitionFunction (org.apache.flink.api.common.functions.MapPartitionFunction)1 RichMapPartitionFunction (org.apache.flink.api.common.functions.RichMapPartitionFunction)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 TupleSummaryAggregator (org.apache.flink.api.java.summarize.aggregation.TupleSummaryAggregator)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 PojoField (org.apache.flink.api.java.typeutils.PojoField)1 PojoTypeInfo (org.apache.flink.api.java.typeutils.PojoTypeInfo)1 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)1 Collector (org.apache.flink.util.Collector)1