use of co.cask.cdap.api.annotation.ProcessInput in project cdap by caskdata.
the class WordSplitter method process.
@ProcessInput
public void process(StreamEvent event) {
// Input is a String, need to split it by whitespace
String inputString = Charset.forName("UTF-8").decode(event.getBody()).toString();
String[] words = inputString.split("\\s+");
List<String> wordList = new ArrayList<>(words.length);
long sumOfLengths = 0;
long wordCount = 0;
// We have an array of words, now remove all non-alpha characters
for (String word : words) {
word = word.replaceAll("[^A-Za-z]", "");
if (!word.isEmpty()) {
// emit every word that remains
wordOutput.emit(word);
wordList.add(word);
sumOfLengths += word.length();
wordCount++;
}
}
// Count other word statistics (word length, total words seen)
this.wordStatsTable.increment(new Increment("totals").add("total_length", sumOfLengths).add("total_words", wordCount));
// Send the list of words to the associater
wordListOutput.emit(wordList);
}
use of co.cask.cdap.api.annotation.ProcessInput in project cdap by caskdata.
the class ProcessMethodExtractor method visit.
@Override
public void visit(Object instance, Type inspectType, Type declareType, Method method) throws Exception {
if (!seenMethods.add(FlowletMethod.create(method, inspectType))) {
// up the class hierarchy.
return;
}
ProcessInput processInputAnnotation = method.getAnnotation(ProcessInput.class);
Tick tickAnnotation = method.getAnnotation(Tick.class);
TypeToken<?> inspectTypeToken = TypeToken.of(inspectType);
if (processInputAnnotation == null && tickAnnotation == null) {
return;
}
// Check for tick method
if (tickAnnotation != null) {
checkArgument(processInputAnnotation == null, "Tick method %s.%s should not have ProcessInput.", inspectTypeToken.getRawType().getName(), method);
checkArgument(method.getParameterTypes().length == 0, "Tick method %s.%s cannot have parameters.", inspectTypeToken.getRawType().getName(), method);
return;
}
Type[] methodParams = method.getGenericParameterTypes();
checkArgument(methodParams.length > 0 && methodParams.length <= 2, "Parameter missing from process method %s.%s.", inspectTypeToken.getRawType().getName(), method);
// If there is more than one parameter there can only be exactly two; the second one must be InputContext type
if (methodParams.length == 2) {
checkArgument(InputContext.class.equals(TypeToken.of(methodParams[1]).getRawType()), "Second parameter must be InputContext type for process method %s.%s.", inspectTypeToken.getRawType().getName(), method);
}
// Extract the Input type from the first parameter of the process method
Type inputType = getInputType(inspectTypeToken, method, inspectTypeToken.resolveType(methodParams[0]).getType());
checkArgument(Reflections.isResolved(inputType), "Invalid type in %s.%s. Only Class or ParameterizedType are supported.", inspectTypeToken.getRawType().getName(), method);
List<String> inputNames = new LinkedList<>();
if (processInputAnnotation.value().length == 0) {
inputNames.add(FlowletDefinition.ANY_INPUT);
} else {
Collections.addAll(inputNames, processInputAnnotation.value());
}
for (String inputName : inputNames) {
Set<Type> types = inputTypes.get(inputName);
if (types == null) {
types = new HashSet<>();
inputTypes.put(inputName, types);
}
checkArgument(types.add(inputType), "Same type already defined for the same input name %s in process method %s.%s.", inputName, inspectTypeToken.getRawType().getName(), method);
}
}
use of co.cask.cdap.api.annotation.ProcessInput in project cdap by caskdata.
the class PurchaseStore method process.
@ProcessInput
public void process(Purchase purchase) {
// Discover the CatalogLookup service via discovery service
// the service name is the same as the one provided in the Application configure method
URL serviceURL = getContext().getServiceURL(PurchaseApp.APP_NAME, CatalogLookupService.SERVICE_NAME);
if (serviceURL != null) {
String catalog = getCatalogId(serviceURL, purchase.getProduct());
if (catalog != null) {
purchase.setCatalogId(catalog);
}
}
metrics.count("purchases." + purchase.getCustomer(), 1);
LOG.info("Purchase info: Customer {}, ProductId {}, CatalogId {}", purchase.getCustomer(), purchase.getProduct(), purchase.getCatalogId());
store.write(Bytes.toBytes(UUID.randomUUID()), purchase);
}
use of co.cask.cdap.api.annotation.ProcessInput in project cdap by caskdata.
the class FlowletProgramRunner method createProcessSpecification.
/**
* Creates all {@link ProcessSpecification} for the process methods of the flowlet class.
*
* @param flowletType Type of the flowlet class represented by {@link TypeToken}.
* @param processMethodFactory A {@link ProcessMethodFactory} for creating {@link ProcessMethod}.
* @param processSpecFactory A {@link ProcessSpecificationFactory} for creating {@link ProcessSpecification}.
* @param result A {@link Collection} for storing newly created {@link ProcessSpecification}.
* @return The same {@link Collection} as the {@code result} parameter.
*/
@SuppressWarnings("unchecked")
private <T extends Collection<ProcessSpecification<?>>> T createProcessSpecification(BasicFlowletContext flowletContext, TypeToken<? extends Flowlet> flowletType, ProcessMethodFactory processMethodFactory, ProcessSpecificationFactory processSpecFactory, T result) throws Exception {
Set<FlowletMethod> seenMethods = Sets.newHashSet();
// Walk up the hierarchy of flowlet class to get all ProcessInput and Tick methods
for (TypeToken<?> type : flowletType.getTypes().classes()) {
if (type.getRawType().equals(Object.class)) {
break;
}
// Extracts all process and tick methods
for (Method method : type.getRawType().getDeclaredMethods()) {
if (method.isSynthetic() || method.isBridge()) {
continue;
}
if (!seenMethods.add(FlowletMethod.create(method, flowletType.getType()))) {
// up the class hierarchy.
continue;
}
ProcessInput processInputAnnotation = method.getAnnotation(ProcessInput.class);
Tick tickAnnotation = method.getAnnotation(Tick.class);
if (processInputAnnotation == null && tickAnnotation == null) {
// Neither a process nor a tick method.
continue;
}
int maxRetries = (tickAnnotation == null) ? processInputAnnotation.maxRetries() : tickAnnotation.maxRetries();
ProcessMethod processMethod = processMethodFactory.create(method, maxRetries);
Set<String> inputNames;
Schema schema;
TypeToken<?> dataType;
ConsumerConfig consumerConfig;
int batchSize = 1;
if (tickAnnotation != null) {
inputNames = ImmutableSet.of();
consumerConfig = new ConsumerConfig(0, 0, 1, DequeueStrategy.FIFO, null);
schema = Schema.of(Schema.Type.NULL);
dataType = TypeToken.of(void.class);
} else {
inputNames = Sets.newHashSet(processInputAnnotation.value());
if (inputNames.isEmpty()) {
// If there is no input name, it would be ANY_INPUT
inputNames.add(FlowletDefinition.ANY_INPUT);
}
// If batch mode then generate schema for Iterator's parameter type
dataType = flowletType.resolveType(method.getGenericParameterTypes()[0]);
consumerConfig = getConsumerConfig(flowletContext, method);
Integer processBatchSize = getBatchSize(method, flowletContext);
if (processBatchSize != null) {
if (dataType.getRawType().equals(Iterator.class)) {
Preconditions.checkArgument(dataType.getType() instanceof ParameterizedType, "Only ParameterizedType is supported for batch Iterator.");
dataType = flowletType.resolveType(((ParameterizedType) dataType.getType()).getActualTypeArguments()[0]);
}
batchSize = processBatchSize;
}
try {
schema = schemaGenerator.generate(dataType.getType());
} catch (UnsupportedTypeException e) {
throw Throwables.propagate(e);
}
}
ProcessSpecification processSpec = processSpecFactory.create(inputNames, schema, dataType, processMethod, consumerConfig, batchSize, tickAnnotation);
// Add processSpec
if (processSpec != null) {
result.add(processSpec);
}
}
}
Preconditions.checkArgument(!result.isEmpty(), "No inputs found for flowlet '%s' of flow '%s' of application '%s' (%s)", flowletContext.getFlowletId(), flowletContext.getFlowId(), flowletContext.getApplicationId(), flowletType);
return result;
}
use of co.cask.cdap.api.annotation.ProcessInput in project cdap by caskdata.
the class FlowUtils method addConsumerGroup.
/**
* Finds all consumer group for the given queue from the given flowlet.
*/
private static void addConsumerGroup(final QueueSpecification queueSpec, final Type flowletType, final long groupId, final int groupSize, final SchemaGenerator schemaGenerator, final Collection<ConsumerGroupConfig> groupConfigs) {
final Set<FlowletMethod> seenMethods = Sets.newHashSet();
Reflections.visit(null, flowletType, new MethodVisitor() {
@Override
public void visit(Object instance, Type inspectType, Type declareType, Method method) throws Exception {
if (!seenMethods.add(FlowletMethod.create(method, inspectType))) {
// up the class hierarchy.
return;
}
ProcessInput processInputAnnotation = method.getAnnotation(ProcessInput.class);
if (processInputAnnotation == null) {
// Consumer has to be process method
return;
}
Set<String> inputNames = Sets.newHashSet(processInputAnnotation.value());
if (inputNames.isEmpty()) {
// If there is no input name, it would be ANY_INPUT
inputNames.add(FlowletDefinition.ANY_INPUT);
}
TypeToken<?> inspectTypeToken = TypeToken.of(inspectType);
TypeToken<?> dataType = inspectTypeToken.resolveType(method.getGenericParameterTypes()[0]);
// For batch mode and if the parameter is Iterator, need to get the actual data type from the Iterator.
if (method.isAnnotationPresent(Batch.class) && Iterator.class.equals(dataType.getRawType())) {
Preconditions.checkArgument(dataType.getType() instanceof ParameterizedType, "Only ParameterizedType is supported for batch Iterator.");
dataType = inspectTypeToken.resolveType(((ParameterizedType) dataType.getType()).getActualTypeArguments()[0]);
}
Schema schema = schemaGenerator.generate(dataType.getType());
if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueSpec.getQueueName().getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
groupConfigs.add(createConsumerGroupConfig(groupId, groupSize, method));
}
}
});
}
Aggregations