Search in sources :

Example 1 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class CoderUtils method getCodedType.

/**
   * If {@code coderType} is a subclass of {@code Coder<T>} for a specific
   * type {@code T}, returns {@code T.class}.
   */
@SuppressWarnings({ "rawtypes", "unchecked" })
public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
    ParameterizedType coderType = (ParameterizedType) coderDescriptor.getSupertype(Coder.class).getType();
    TypeDescriptor codedType = TypeDescriptor.of(coderType.getActualTypeArguments()[0]);
    return codedType;
}
Also used : ParameterizedType(java.lang.reflect.ParameterizedType) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor)

Example 2 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class CoderRegistry method verifyCompatible.

/**
 * Returns {@code true} if the given {@link Coder} can possibly encode elements of the given type.
 */
@VisibleForTesting
static <T, CoderT extends Coder<T>, CandidateT> void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
    // Various representations of the coder's class
    @SuppressWarnings("unchecked") Class<CoderT> coderClass = (Class<CoderT>) coder.getClass();
    TypeDescriptor<CoderT> coderDescriptor = TypeDescriptor.of(coderClass);
    // Various representations of the actual coded type
    @SuppressWarnings("unchecked") TypeDescriptor<T> codedDescriptor = CoderUtils.getCodedType(coderDescriptor);
    @SuppressWarnings("unchecked") Class<T> codedClass = (Class<T>) codedDescriptor.getRawType();
    Type codedType = codedDescriptor.getType();
    // Various representations of the candidate type
    @SuppressWarnings("unchecked") TypeDescriptor<CandidateT> candidateDescriptor = (TypeDescriptor<CandidateT>) TypeDescriptor.of(candidateType);
    @SuppressWarnings("unchecked") Class<CandidateT> candidateClass = (Class<CandidateT>) candidateDescriptor.getRawType();
    // to erasure, then we cannot rule it out.
    if (candidateType instanceof TypeVariable) {
        return;
    }
    // coder compatibility
    if (!codedClass.isAssignableFrom(candidateClass)) {
        throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s because the" + " coded type %s is not assignable from %s", candidateType, coder, codedClass, candidateType), coder, candidateType);
    }
    // we have established that this is a covariant upcast... though
    // coders are invariant, we are just checking one direction
    @SuppressWarnings("unchecked") TypeDescriptor<T> candidateOkDescriptor = (TypeDescriptor<T>) candidateDescriptor;
    // compatible.
    if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
        ParameterizedType parameterizedSupertype = (ParameterizedType) candidateOkDescriptor.getSupertype(codedClass).getType();
        Type[] typeArguments = parameterizedSupertype.getActualTypeArguments();
        List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
        if (typeArguments.length < typeArgumentCoders.size()) {
            throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s:" + " the generic supertype %s has %s type parameters, which is less than the" + " number of coder arguments %s has (%s).", candidateOkDescriptor, coder, parameterizedSupertype, typeArguments.length, coder, typeArgumentCoders.size()), coder, candidateOkDescriptor.getType());
        }
        for (int i = 0; i < typeArgumentCoders.size(); i++) {
            try {
                Coder<?> typeArgumentCoder = typeArgumentCoders.get(i);
                verifyCompatible(typeArgumentCoder, candidateDescriptor.resolveType(typeArguments[i]).getType());
            } catch (IncompatibleCoderException exn) {
                throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s" + " because some component coder is incompatible", candidateType, coder), coder, candidateType, exn);
            }
        }
    }
}
Also used : ParameterizedType(java.lang.reflect.ParameterizedType) WildcardType(java.lang.reflect.WildcardType) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) TypeVariable(java.lang.reflect.TypeVariable) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 3 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class HadoopFormatIOSequenceFileTest method streamTest.

@Test
public void streamTest() {
    TestStream<String> stringsStream = TestStream.create(StringUtf8Coder.of()).advanceWatermarkTo(START_TIME).addElements(event(FIRST_WIN_WORDS.get(0), 2L)).advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(27L))).addElements(event(FIRST_WIN_WORDS.get(1), 25L), event(FIRST_WIN_WORDS.get(2), 18L), event(FIRST_WIN_WORDS.get(3), 28L)).advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(65L))).addElements(event(SECOND_WIN_WORDS.get(0), 61L), event(SECOND_WIN_WORDS.get(1), 63L)).advanceWatermarkToInfinity();
    String outputDirPath = getOutputDirPath("streamTest");
    PCollection<KV<Text, LongWritable>> dataToWrite = pipeline.apply(stringsStream).apply(Window.into(FixedWindows.of(WINDOW_DURATION))).apply(ParDo.of(new ConvertToLowerCaseFn())).apply(new WordCount.CountWords()).apply("ConvertToHadoopFormat", ParDo.of(new ConvertToHadoopFormatFn<>(KV_STR_INT_2_TXT_LONGWRITABLE))).setTypeDescriptor(TypeDescriptors.kvs(new TypeDescriptor<Text>() {
    }, new TypeDescriptor<LongWritable>() {
    }));
    ConfigTransform<Text, LongWritable> configurationTransformation = new ConfigTransform<>(outputDirPath, Text.class, LongWritable.class);
    dataToWrite.apply(HadoopFormatIO.<Text, LongWritable>write().withConfigurationTransform(configurationTransformation).withExternalSynchronization(new HDFSSynchronization(getLocksDirPath())));
    pipeline.run();
    Map<String, Long> values = loadWrittenDataAsMap(outputDirPath);
    MatcherAssert.assertThat(values.entrySet(), equalTo(computeWordCounts(FIRST_WIN_WORDS).entrySet()));
    Assert.assertEquals("In lock folder shouldn't be any file", 0, new File(getLocksDirPath()).list().length);
}
Also used : Text(org.apache.hadoop.io.Text) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) WordCount(org.apache.beam.examples.WordCount) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) Test(org.junit.Test)

Example 4 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class Create method getDefaultCreateCoder.

private static <T> Coder<T> getDefaultCreateCoder(CoderRegistry coderRegistry, SchemaRegistry schemaRegistry, Iterable<T> elems) throws CannotProvideCoderException {
    checkArgument(!Iterables.isEmpty(elems), "Can not determine a default Coder for a 'Create' PTransform that " + "has no elements.  Either add elements, call Create.empty(Coder)," + " Create.empty(TypeDescriptor), or call 'withCoder(Coder)' or " + "'withType(TypeDescriptor)' on the PTransform.");
    // First try to deduce a coder using the types of the elements.
    Class<?> elementClazz = Void.class;
    for (T elem : elems) {
        if (elem == null) {
            continue;
        }
        Class<?> clazz = elem.getClass();
        if (elementClazz.equals(Void.class)) {
            elementClazz = clazz;
        } else if (!elementClazz.equals(clazz)) {
            // Elements are not the same type, require a user-specified coder.
            throw new CannotProvideCoderException(String.format("Cannot provide coder for %s: The elements are not all of the same class.", Create.class.getSimpleName()));
        }
    }
    TypeDescriptor<T> typeDescriptor = (TypeDescriptor<T>) TypeDescriptor.of(elementClazz);
    if (elementClazz.getTypeParameters().length == 0) {
        try {
            Coder<T> coder = SchemaCoder.of(schemaRegistry.getSchema(typeDescriptor), typeDescriptor, schemaRegistry.getToRowFunction(typeDescriptor), schemaRegistry.getFromRowFunction(typeDescriptor));
            return coder;
        } catch (NoSuchSchemaException e) {
        // No schema.
        }
        try {
            // elementClazz is a wildcard type
            @SuppressWarnings("unchecked") Coder<T> coder = (Coder<T>) coderRegistry.getCoder(typeDescriptor);
            return coder;
        } catch (CannotProvideCoderException exc) {
        // Can't get a coder from the class of the elements, try from elements next.
        }
    }
    // If that fails, try to deduce a coder using the elements themselves
    return (Coder<T>) inferCoderFromObjects(coderRegistry, schemaRegistry, elems);
}
Also used : TimestampedValueCoder(org.apache.beam.sdk.values.TimestampedValue.TimestampedValueCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) SetCoder(org.apache.beam.sdk.coders.SetCoder) MapCoder(org.apache.beam.sdk.coders.MapCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) DequeCoder(org.apache.beam.sdk.coders.DequeCoder) Coder(org.apache.beam.sdk.coders.Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) CollectionCoder(org.apache.beam.sdk.coders.CollectionCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) CannotProvideCoderException(org.apache.beam.sdk.coders.CannotProvideCoderException) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException)

Example 5 with TypeDescriptor

use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.

the class DoFnSignatures method analyzeProcessElementMethod.

@VisibleForTesting
static DoFnSignature.ProcessElementMethod analyzeProcessElementMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnClass, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, FnAnalysisContext fnContext) {
    errors.checkArgument(void.class.equals(m.getReturnType()) || DoFn.ProcessContinuation.class.equals(m.getReturnType()), "Must return void or %s", format(DoFn.ProcessContinuation.class));
    MethodAnalysisContext methodContext = MethodAnalysisContext.create();
    boolean requiresStableInput = m.isAnnotationPresent(DoFn.RequiresStableInput.class);
    boolean requiresTimeSortedInput = m.isAnnotationPresent(DoFn.RequiresTimeSortedInput.class);
    TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnClass, m);
    Type[] params = m.getGenericParameterTypes();
    for (int i = 0; i < params.length; ++i) {
        Parameter extraParam = analyzeExtraParameter(errors.forMethod(DoFn.ProcessElement.class, m), fnContext, methodContext, ParameterDescription.of(m, i, fnClass.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
        methodContext.addParameter(extraParam);
    }
    int schemaElementIndex = 0;
    for (int i = 0; i < methodContext.getExtraParameters().size(); ++i) {
        Parameter parameter = methodContext.getExtraParameters().get(i);
        if (parameter instanceof SchemaElementParameter) {
            SchemaElementParameter schemaParameter = (SchemaElementParameter) parameter;
            schemaParameter = schemaParameter.toBuilder().setIndex(schemaElementIndex).build();
            methodContext.setParameter(i, schemaParameter);
            ++schemaElementIndex;
        }
    }
    TypeDescriptor<?> trackerT = methodContext.findParameter(RestrictionTrackerParameter.class).map(p -> p.trackerT()).orElse(null);
    TypeDescriptor<?> watermarkEstimatorT = methodContext.findParameter(WatermarkEstimatorParameter.class).map(p -> p.estimatorT()).orElse(null);
    // The allowed parameters depend on whether this DoFn is splittable
    if (trackerT != null) {
        for (Parameter parameter : methodContext.getExtraParameters()) {
            checkParameterOneOf(errors, parameter, ALLOWED_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
        }
    } else {
        for (Parameter parameter : methodContext.getExtraParameters()) {
            checkParameterOneOf(errors, parameter, ALLOWED_NON_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
        }
    }
    return DoFnSignature.ProcessElementMethod.create(m, methodContext.getExtraParameters(), requiresStableInput, requiresTimeSortedInput, trackerT, watermarkEstimatorT, windowT, DoFn.ProcessContinuation.class.equals(m.getReturnType()));
}
Also used : MethodWithExtraParameters(org.apache.beam.sdk.transforms.reflect.DoFnSignature.MethodWithExtraParameters) StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) ValueState(org.apache.beam.sdk.state.ValueState) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) TruncateResult(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.TruncateResult) WatermarkEstimatorParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter) HasDefaultWatermarkEstimator(org.apache.beam.sdk.transforms.splittabledofn.HasDefaultWatermarkEstimator) Map(java.util.Map) OutputReceiver(org.apache.beam.sdk.transforms.DoFn.OutputReceiver) Predicates(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Predicates) BundleFinalizerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter) Method(java.lang.reflect.Method) Internal(org.apache.beam.sdk.annotations.Internal) MultiOutputReceiver(org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver) GetInitialRestrictionMethod(org.apache.beam.sdk.transforms.reflect.DoFnSignature.GetInitialRestrictionMethod) TruncateRestriction(org.apache.beam.sdk.transforms.DoFn.TruncateRestriction) OrderedListState(org.apache.beam.sdk.state.OrderedListState) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) SideInput(org.apache.beam.sdk.transforms.DoFn.SideInput) Collectors(java.util.stream.Collectors) List(java.util.List) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) PipelineOptionsParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter) Type(java.lang.reflect.Type) Timer(org.apache.beam.sdk.state.Timer) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) Modifier(java.lang.reflect.Modifier) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) AutoValue(com.google.auto.value.AutoValue) Annotation(java.lang.annotation.Annotation) Optional(java.util.Optional) HasDefaultTracker(org.apache.beam.sdk.transforms.splittabledofn.HasDefaultTracker) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) FieldAccessDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.FieldAccessDeclaration) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) TimerMap(org.apache.beam.sdk.state.TimerMap) SetState(org.apache.beam.sdk.state.SetState) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) TimerFamilyParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter) Coder(org.apache.beam.sdk.coders.Coder) WatermarkEstimator(org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeParameter(org.apache.beam.sdk.values.TypeParameter) LinkedHashMap(java.util.LinkedHashMap) MapState(org.apache.beam.sdk.state.MapState) TimerSpec(org.apache.beam.sdk.state.TimerSpec) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) TimerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter) WatermarkEstimatorStateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) FormatMethod(com.google.errorprone.annotations.FormatMethod) FormatString(com.google.errorprone.annotations.FormatString) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFn(org.apache.beam.sdk.transforms.DoFn) GetInitialWatermarkEstimatorStateMethod(org.apache.beam.sdk.transforms.reflect.DoFnSignature.GetInitialWatermarkEstimatorStateMethod) State(org.apache.beam.sdk.state.State) TimerFamilyDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerFamilyDeclaration) Field(java.lang.reflect.Field) ManualWatermarkEstimator(org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator) PCollection(org.apache.beam.sdk.values.PCollection) StateId(org.apache.beam.sdk.transforms.DoFn.StateId) RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) BagState(org.apache.beam.sdk.state.BagState) StateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) ParameterizedType(java.lang.reflect.ParameterizedType) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) TimerId(org.apache.beam.sdk.transforms.DoFn.TimerId) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) ReadableState(org.apache.beam.sdk.state.ReadableState) Type(java.lang.reflect.Type) ParameterizedType(java.lang.reflect.ParameterizedType) DoFn(org.apache.beam.sdk.transforms.DoFn) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) WatermarkEstimatorParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter) BundleFinalizerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter) PipelineOptionsParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) TimerFamilyParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter) TypeParameter(org.apache.beam.sdk.values.TypeParameter) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) TimerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter) WatermarkEstimatorStateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) StateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

TypeDescriptor (org.apache.beam.sdk.values.TypeDescriptor)20 ParameterizedType (java.lang.reflect.ParameterizedType)8 Type (java.lang.reflect.Type)6 DoFn (org.apache.beam.sdk.transforms.DoFn)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)5 LinkedHashMap (java.util.LinkedHashMap)4 Timer (org.apache.beam.sdk.state.Timer)4 KV (org.apache.beam.sdk.values.KV)4 Test (org.junit.Test)4 FormatString (com.google.errorprone.annotations.FormatString)3 List (java.util.List)3 Map (java.util.Map)3 BagState (org.apache.beam.sdk.state.BagState)3 MapState (org.apache.beam.sdk.state.MapState)3 OrderedListState (org.apache.beam.sdk.state.OrderedListState)3 ReadableState (org.apache.beam.sdk.state.ReadableState)3 SetState (org.apache.beam.sdk.state.SetState)3 State (org.apache.beam.sdk.state.State)3 ValueState (org.apache.beam.sdk.state.ValueState)3 WatermarkHoldState (org.apache.beam.sdk.state.WatermarkHoldState)3