use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class FlowletProgramRunner method processSpecificationFactory.
private ProcessSpecificationFactory processSpecificationFactory(final BasicFlowletContext flowletContext, final DataFabricFacade dataFabricFacade, final QueueReaderFactory queueReaderFactory, final String flowletName, final Table<Node, String, Set<QueueSpecification>> queueSpecs, final ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder, final SchemaCache schemaCache) {
return new ProcessSpecificationFactory() {
@Override
public <T> ProcessSpecification create(Set<String> inputNames, Schema schema, TypeToken<T> dataType, ProcessMethod<T> method, ConsumerConfig consumerConfig, int batchSize, Tick tickAnnotation) throws Exception {
List<QueueReader<T>> queueReaders = Lists.newLinkedList();
for (Map.Entry<Node, Set<QueueSpecification>> entry : queueSpecs.column(flowletName).entrySet()) {
for (QueueSpecification queueSpec : entry.getValue()) {
final QueueName queueName = queueSpec.getQueueName();
if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueName.getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
Node sourceNode = entry.getKey();
if (sourceNode.getType() == FlowletConnection.Type.STREAM) {
ConsumerSupplier<StreamConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig);
queueConsumerSupplierBuilder.add(consumerSupplier);
// No decoding is needed, as a process method can only have StreamEvent as type for consuming stream
Function<StreamEvent, T> decoder = wrapInputDecoder(flowletContext, null, queueName, new Function<StreamEvent, T>() {
@Override
@SuppressWarnings("unchecked")
public T apply(StreamEvent input) {
return (T) input;
}
});
queueReaders.add(queueReaderFactory.createStreamReader(queueName.toStreamId(), consumerSupplier, batchSize, decoder));
} else {
int numGroups = getNumGroups(Iterables.concat(queueSpecs.row(entry.getKey()).values()), queueName);
Function<ByteBuffer, T> decoder = wrapInputDecoder(// the producer flowlet,
flowletContext, // the producer flowlet,
entry.getKey().getName(), queueName, createInputDatumDecoder(dataType, schema, schemaCache));
ConsumerSupplier<QueueConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig, numGroups);
queueConsumerSupplierBuilder.add(consumerSupplier);
queueReaders.add(queueReaderFactory.createQueueReader(consumerSupplier, batchSize, decoder));
}
}
}
}
// If inputs is needed but there is no available input queue, return null
if (!inputNames.isEmpty() && queueReaders.isEmpty()) {
return null;
}
return new ProcessSpecification<>(new RoundRobinQueueReader<>(queueReaders), method, tickAnnotation);
}
};
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class FlowletProgramRunner method createSchemaCache.
private SchemaCache createSchemaCache(Program program) throws Exception {
ImmutableSet.Builder<Schema> schemas = ImmutableSet.builder();
for (FlowSpecification flowSpec : program.getApplicationSpecification().getFlows().values()) {
for (FlowletDefinition flowletDef : flowSpec.getFlowlets().values()) {
schemas.addAll(Iterables.concat(flowletDef.getInputs().values()));
schemas.addAll(Iterables.concat(flowletDef.getOutputs().values()));
}
}
// Temp fix for ENG-3949. Always add old stream event schema.
// TODO: Remove it later. The right thing to do is to have schemas history being stored to support schema
// evolution. By design, as long as the schema cache is populated with old schema, the type projection logic
// in the decoder would handle it correctly.
schemas.add(schemaGenerator.generate(StreamEventData.class));
return new SchemaCache(schemas.build(), program.getClassLoader());
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class StreamBatchSource method transform.
@Override
public void transform(KeyValue<LongWritable, Object> input, Emitter<StructuredRecord> emitter) throws Exception {
// if not format spec was given, the value is a StreamEvent
if (Strings.isNullOrEmpty(streamBatchConfig.format)) {
StreamEvent event = (StreamEvent) input.getValue();
Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
StructuredRecord output = StructuredRecord.builder(DEFAULT_SCHEMA).set("ts", input.getKey().get()).set("headers", headers).set("body", event.getBody()).build();
emitter.emit(output);
} else {
// otherwise, it will be a GenericStreamEventData
@SuppressWarnings("unchecked") GenericStreamEventData<StructuredRecord> event = (GenericStreamEventData<StructuredRecord>) input.getValue();
StructuredRecord record = event.getBody();
Schema inputSchema = record.getSchema();
Schema outputSchema = schemaCache.get(inputSchema);
// if we haven't seen this schema before, generate the output schema (add ts and header fields)
if (outputSchema == null) {
List<Schema.Field> fields = Lists.newArrayList();
fields.add(DEFAULT_SCHEMA.getField("ts"));
fields.add(DEFAULT_SCHEMA.getField("headers"));
fields.addAll(inputSchema.getFields());
outputSchema = Schema.recordOf(inputSchema.getRecordName(), fields);
schemaCache.put(inputSchema, outputSchema);
}
// easier to just deal with an empty map than deal with nullables, so the headers field is non-nullable.
Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
builder.set("ts", input.getKey().get());
builder.set("headers", headers);
for (Schema.Field field : inputSchema.getFields()) {
String fieldName = field.getName();
builder.set(fieldName, record.get(fieldName));
}
emitter.emit(builder.build());
}
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class LookupTransform method transform.
@Override
public void transform(StructuredRecord input, Emitter<StructuredRecord> emitter) throws Exception {
T lookedUpValue = lookup.lookup((String) input.get(config.lookupKey));
// for the output schema, copy all the input fields, and add the 'destinationField'
List<Schema.Field> outFields = new ArrayList<>();
for (Schema.Field field : input.getSchema().getFields()) {
outFields.add(field);
}
if (lookedUpValue instanceof String) {
outFields.add(Schema.Field.of(config.destinationField, Schema.of(Schema.Type.STRING)));
} else if (lookedUpValue instanceof Row) {
Row lookedupRow = (Row) lookedUpValue;
for (byte[] column : lookedupRow.getColumns().keySet()) {
outFields.add(Schema.Field.of(Bytes.toString(column), Schema.of(Schema.Type.STRING)));
}
} else {
throw new IllegalArgumentException("Unexpected value type: " + lookedUpValue.getClass());
}
Schema outSchema = Schema.recordOf(input.getSchema().getRecordName(), outFields);
// copy all the values
StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outSchema);
for (Schema.Field inField : input.getSchema().getFields()) {
if (inField.getName().equals(config.lookupKey)) {
if (lookedUpValue instanceof String) {
outputBuilder.set(config.destinationField, lookedUpValue);
} else {
// due to the check above, we know its a Row
Row lookedupRow = (Row) lookedUpValue;
for (Map.Entry<byte[], byte[]> entry : lookedupRow.getColumns().entrySet()) {
outputBuilder.set(Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue()));
}
}
}
// what if the destinationField already exists?
outputBuilder.set(inField.getName(), input.get(inField.getName()));
}
emitter.emit(outputBuilder.build());
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class DupeFlagger method merge.
@Override
public StructuredRecord merge(StructuredRecord joinKey, Iterable<JoinElement<StructuredRecord>> joinRow) {
StructuredRecord record = null;
boolean containsDupe = false;
for (JoinElement<StructuredRecord> element : joinRow) {
if (element.getStageName().equals(config.keep)) {
record = element.getInputRecord();
} else {
containsDupe = true;
}
}
if (record == null) {
// can only happen if 'keep' was a macro and did not evaluate to one of the inputs
throw new IllegalArgumentException("No record for " + config.keep + " was found.");
}
Schema outputSchema = getOutputSchema(record.getSchema());
StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outputSchema).set(config.flagField, containsDupe);
for (Schema.Field field : record.getSchema().getFields()) {
outputBuilder.set(field.getName(), record.get(field.getName()));
}
return outputBuilder.build();
}
Aggregations