use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class GrokRecordFormat method validateSchema.
@Override
protected void validateSchema(Schema desiredSchema) throws UnsupportedTypeException {
// a valid schema is a record of simple types. In other words, no maps, arrays, records, unions, or enums allowed.
// the exception is the very last field, which is allowed to be an array of simple types.
// These types may be nullable, which is a union of a null and non-null type.
Iterator<Schema.Field> fields = desiredSchema.getFields().iterator();
// check that each field is a simple field, except for the very last field, which can be an array of simple types.
while (fields.hasNext()) {
Schema.Field field = fields.next();
Schema schema = field.getSchema();
// if we're not on the very last field, the field must be a simple type or a nullable simple type.
boolean isSimple = schema.getType().isSimpleType();
boolean isNullableSimple = schema.isNullableSimple();
if (!isSimple && !isNullableSimple) {
// if this is the very last field and a string array, it is valid. otherwise it is not.
if (fields.hasNext() || !isStringArray(schema)) {
throw new UnsupportedTypeException("Field " + field.getName() + " is of invalid type.");
}
}
}
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class RecordPutTransformer method toPut.
public Put toPut(StructuredRecord record) {
Schema recordSchema = record.getSchema();
Preconditions.checkArgument(recordSchema.getType() == Schema.Type.RECORD, "input must be a record.");
Schema.Field keyField = getKeyField(recordSchema);
Preconditions.checkArgument(keyField != null, "Could not find key field in record.");
Put output = createPut(record, keyField);
for (Schema.Field field : recordSchema.getFields()) {
if (field.getName().equals(keyField.getName())) {
continue;
}
// Skip fields that are not present in the Output Schema
if (outputSchema != null && outputSchema.getField(field.getName()) == null) {
continue;
}
setField(output, field, record.get(field.getName()));
}
return output;
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class StructuredRecordDatumReader method read.
@Override
protected Object read(Object old, org.apache.avro.Schema expected, ResolvingDecoder in) throws IOException {
if (expected.getType() != org.apache.avro.Schema.Type.UNION) {
return super.read(old, expected, in);
}
// For Union type
Schema tmpSchema = currentSchema;
try {
int idx = in.readIndex();
currentSchema = currentSchema.getUnionSchema(idx);
return read(old, expected.getTypes().get(idx), in);
} finally {
currentSchema = tmpSchema;
}
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class StructuredRecordDatumReader method readRecord.
@Override
protected Object readRecord(Object old, org.apache.avro.Schema expected, ResolvingDecoder in) throws IOException {
StructuredRecord.Builder builder = StructuredRecord.builder(currentSchema);
for (org.apache.avro.Schema.Field f : in.readFieldOrder()) {
String name = f.name();
Schema tmpSchema = currentSchema;
try {
currentSchema = getFieldSchema(name, currentSchema);
builder.set(name, read(null, f.schema(), in));
} finally {
currentSchema = tmpSchema;
}
}
return builder.build();
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class FlowletProgramRunner method outputEmitterFactory.
private OutputEmitterFactory outputEmitterFactory(final BasicFlowletContext flowletContext, final String flowletName, final QueueClientFactory queueClientFactory, final ImmutableList.Builder<ProducerSupplier> producerBuilder, final Table<Node, String, Set<QueueSpecification>> queueSpecs) {
return new OutputEmitterFactory() {
@Override
public <T> OutputEmitter<T> create(String outputName, TypeToken<T> type) {
try {
// first iterate over all queue specifications to find the queue name and all consumer flowlet ids
QueueName queueName = null;
List<String> consumerFlowlets = Lists.newLinkedList();
Node flowlet = Node.flowlet(flowletName);
Schema schema = schemaGenerator.generate(type.getType());
for (Map.Entry<String, Set<QueueSpecification>> entry : queueSpecs.row(flowlet).entrySet()) {
for (QueueSpecification queueSpec : entry.getValue()) {
if (queueSpec.getQueueName().getSimpleName().equals(outputName) && queueSpec.getOutputSchema().equals(schema)) {
queueName = queueSpec.getQueueName();
consumerFlowlets.add(entry.getKey());
break;
}
}
}
if (queueName == null) {
throw new IllegalArgumentException(String.format("No queue specification found for %s, %s", flowletName, type));
}
// create a metric collector for this queue, and also one for each consumer flowlet
final MetricsContext metrics = flowletContext.getProgramMetrics().childContext(Constants.Metrics.Tag.FLOWLET_QUEUE, outputName);
final MetricsContext producerMetrics = metrics.childContext(Constants.Metrics.Tag.PRODUCER, flowletContext.getFlowletId());
final Iterable<MetricsContext> consumerMetrics = Iterables.transform(consumerFlowlets, new Function<String, MetricsContext>() {
@Override
public MetricsContext apply(String consumer) {
return producerMetrics.childContext(Constants.Metrics.Tag.CONSUMER, consumer);
}
});
// create a queue metrics emitter that emit to all of the above collectors
ProducerSupplier producerSupplier = new ProducerSupplier(queueName, queueClientFactory, new QueueMetrics() {
@Override
public void emitEnqueue(int count) {
metrics.increment("process.events.out", count);
for (MetricsContext collector : consumerMetrics) {
collector.increment("queue.pending", count);
}
}
@Override
public void emitEnqueueBytes(int bytes) {
// no-op
}
});
producerBuilder.add(producerSupplier);
return new DatumOutputEmitter<>(producerSupplier, schema, datumWriterFactory.create(type, schema));
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
};
}
Aggregations