use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.
the class CoderUtils method getCodedType.
/**
* If {@code coderType} is a subclass of {@code Coder<T>} for a specific
* type {@code T}, returns {@code T.class}.
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
ParameterizedType coderType = (ParameterizedType) coderDescriptor.getSupertype(Coder.class).getType();
TypeDescriptor codedType = TypeDescriptor.of(coderType.getActualTypeArguments()[0]);
return codedType;
}
use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.
the class CoderRegistry method verifyCompatible.
/**
* Returns {@code true} if the given {@link Coder} can possibly encode elements of the given type.
*/
@VisibleForTesting
static <T, CoderT extends Coder<T>, CandidateT> void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
// Various representations of the coder's class
@SuppressWarnings("unchecked") Class<CoderT> coderClass = (Class<CoderT>) coder.getClass();
TypeDescriptor<CoderT> coderDescriptor = TypeDescriptor.of(coderClass);
// Various representations of the actual coded type
@SuppressWarnings("unchecked") TypeDescriptor<T> codedDescriptor = CoderUtils.getCodedType(coderDescriptor);
@SuppressWarnings("unchecked") Class<T> codedClass = (Class<T>) codedDescriptor.getRawType();
Type codedType = codedDescriptor.getType();
// Various representations of the candidate type
@SuppressWarnings("unchecked") TypeDescriptor<CandidateT> candidateDescriptor = (TypeDescriptor<CandidateT>) TypeDescriptor.of(candidateType);
@SuppressWarnings("unchecked") Class<CandidateT> candidateClass = (Class<CandidateT>) candidateDescriptor.getRawType();
// to erasure, then we cannot rule it out.
if (candidateType instanceof TypeVariable) {
return;
}
// coder compatibility
if (!codedClass.isAssignableFrom(candidateClass)) {
throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s because the" + " coded type %s is not assignable from %s", candidateType, coder, codedClass, candidateType), coder, candidateType);
}
// we have established that this is a covariant upcast... though
// coders are invariant, we are just checking one direction
@SuppressWarnings("unchecked") TypeDescriptor<T> candidateOkDescriptor = (TypeDescriptor<T>) candidateDescriptor;
// compatible.
if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
ParameterizedType parameterizedSupertype = (ParameterizedType) candidateOkDescriptor.getSupertype(codedClass).getType();
Type[] typeArguments = parameterizedSupertype.getActualTypeArguments();
List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
if (typeArguments.length < typeArgumentCoders.size()) {
throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s:" + " the generic supertype %s has %s type parameters, which is less than the" + " number of coder arguments %s has (%s).", candidateOkDescriptor, coder, parameterizedSupertype, typeArguments.length, coder, typeArgumentCoders.size()), coder, candidateOkDescriptor.getType());
}
for (int i = 0; i < typeArgumentCoders.size(); i++) {
try {
Coder<?> typeArgumentCoder = typeArgumentCoders.get(i);
verifyCompatible(typeArgumentCoder, candidateDescriptor.resolveType(typeArguments[i]).getType());
} catch (IncompatibleCoderException exn) {
throw new IncompatibleCoderException(String.format("Cannot encode elements of type %s with coder %s" + " because some component coder is incompatible", candidateType, coder), coder, candidateType, exn);
}
}
}
}
use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.
the class HadoopFormatIOSequenceFileTest method streamTest.
@Test
public void streamTest() {
TestStream<String> stringsStream = TestStream.create(StringUtf8Coder.of()).advanceWatermarkTo(START_TIME).addElements(event(FIRST_WIN_WORDS.get(0), 2L)).advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(27L))).addElements(event(FIRST_WIN_WORDS.get(1), 25L), event(FIRST_WIN_WORDS.get(2), 18L), event(FIRST_WIN_WORDS.get(3), 28L)).advanceWatermarkTo(START_TIME.plus(Duration.standardSeconds(65L))).addElements(event(SECOND_WIN_WORDS.get(0), 61L), event(SECOND_WIN_WORDS.get(1), 63L)).advanceWatermarkToInfinity();
String outputDirPath = getOutputDirPath("streamTest");
PCollection<KV<Text, LongWritable>> dataToWrite = pipeline.apply(stringsStream).apply(Window.into(FixedWindows.of(WINDOW_DURATION))).apply(ParDo.of(new ConvertToLowerCaseFn())).apply(new WordCount.CountWords()).apply("ConvertToHadoopFormat", ParDo.of(new ConvertToHadoopFormatFn<>(KV_STR_INT_2_TXT_LONGWRITABLE))).setTypeDescriptor(TypeDescriptors.kvs(new TypeDescriptor<Text>() {
}, new TypeDescriptor<LongWritable>() {
}));
ConfigTransform<Text, LongWritable> configurationTransformation = new ConfigTransform<>(outputDirPath, Text.class, LongWritable.class);
dataToWrite.apply(HadoopFormatIO.<Text, LongWritable>write().withConfigurationTransform(configurationTransformation).withExternalSynchronization(new HDFSSynchronization(getLocksDirPath())));
pipeline.run();
Map<String, Long> values = loadWrittenDataAsMap(outputDirPath);
MatcherAssert.assertThat(values.entrySet(), equalTo(computeWordCounts(FIRST_WIN_WORDS).entrySet()));
Assert.assertEquals("In lock folder shouldn't be any file", 0, new File(getLocksDirPath()).list().length);
}
use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.
the class Create method getDefaultCreateCoder.
private static <T> Coder<T> getDefaultCreateCoder(CoderRegistry coderRegistry, SchemaRegistry schemaRegistry, Iterable<T> elems) throws CannotProvideCoderException {
checkArgument(!Iterables.isEmpty(elems), "Can not determine a default Coder for a 'Create' PTransform that " + "has no elements. Either add elements, call Create.empty(Coder)," + " Create.empty(TypeDescriptor), or call 'withCoder(Coder)' or " + "'withType(TypeDescriptor)' on the PTransform.");
// First try to deduce a coder using the types of the elements.
Class<?> elementClazz = Void.class;
for (T elem : elems) {
if (elem == null) {
continue;
}
Class<?> clazz = elem.getClass();
if (elementClazz.equals(Void.class)) {
elementClazz = clazz;
} else if (!elementClazz.equals(clazz)) {
// Elements are not the same type, require a user-specified coder.
throw new CannotProvideCoderException(String.format("Cannot provide coder for %s: The elements are not all of the same class.", Create.class.getSimpleName()));
}
}
TypeDescriptor<T> typeDescriptor = (TypeDescriptor<T>) TypeDescriptor.of(elementClazz);
if (elementClazz.getTypeParameters().length == 0) {
try {
Coder<T> coder = SchemaCoder.of(schemaRegistry.getSchema(typeDescriptor), typeDescriptor, schemaRegistry.getToRowFunction(typeDescriptor), schemaRegistry.getFromRowFunction(typeDescriptor));
return coder;
} catch (NoSuchSchemaException e) {
// No schema.
}
try {
// elementClazz is a wildcard type
@SuppressWarnings("unchecked") Coder<T> coder = (Coder<T>) coderRegistry.getCoder(typeDescriptor);
return coder;
} catch (CannotProvideCoderException exc) {
// Can't get a coder from the class of the elements, try from elements next.
}
}
// If that fails, try to deduce a coder using the elements themselves
return (Coder<T>) inferCoderFromObjects(coderRegistry, schemaRegistry, elems);
}
use of org.apache.beam.sdk.values.TypeDescriptor in project beam by apache.
the class DoFnSignatures method analyzeProcessElementMethod.
@VisibleForTesting
static DoFnSignature.ProcessElementMethod analyzeProcessElementMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnClass, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, FnAnalysisContext fnContext) {
errors.checkArgument(void.class.equals(m.getReturnType()) || DoFn.ProcessContinuation.class.equals(m.getReturnType()), "Must return void or %s", format(DoFn.ProcessContinuation.class));
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
boolean requiresStableInput = m.isAnnotationPresent(DoFn.RequiresStableInput.class);
boolean requiresTimeSortedInput = m.isAnnotationPresent(DoFn.RequiresTimeSortedInput.class);
TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnClass, m);
Type[] params = m.getGenericParameterTypes();
for (int i = 0; i < params.length; ++i) {
Parameter extraParam = analyzeExtraParameter(errors.forMethod(DoFn.ProcessElement.class, m), fnContext, methodContext, ParameterDescription.of(m, i, fnClass.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
methodContext.addParameter(extraParam);
}
int schemaElementIndex = 0;
for (int i = 0; i < methodContext.getExtraParameters().size(); ++i) {
Parameter parameter = methodContext.getExtraParameters().get(i);
if (parameter instanceof SchemaElementParameter) {
SchemaElementParameter schemaParameter = (SchemaElementParameter) parameter;
schemaParameter = schemaParameter.toBuilder().setIndex(schemaElementIndex).build();
methodContext.setParameter(i, schemaParameter);
++schemaElementIndex;
}
}
TypeDescriptor<?> trackerT = methodContext.findParameter(RestrictionTrackerParameter.class).map(p -> p.trackerT()).orElse(null);
TypeDescriptor<?> watermarkEstimatorT = methodContext.findParameter(WatermarkEstimatorParameter.class).map(p -> p.estimatorT()).orElse(null);
// The allowed parameters depend on whether this DoFn is splittable
if (trackerT != null) {
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
}
} else {
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_NON_SPLITTABLE_PROCESS_ELEMENT_PARAMETERS);
}
}
return DoFnSignature.ProcessElementMethod.create(m, methodContext.getExtraParameters(), requiresStableInput, requiresTimeSortedInput, trackerT, watermarkEstimatorT, windowT, DoFn.ProcessContinuation.class.equals(m.getReturnType()));
}
Aggregations