use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class JsonStructuredRecordDatumReader method decodeUnion.
@Override
protected Object decodeUnion(Decoder decoder, Schema unionSchema) throws IOException {
JsonReader jsonReader = getJsonReader(decoder);
JsonToken token = jsonReader.peek();
// Based on the token to guess the schema
for (Schema schema : unionSchema.getUnionSchemas()) {
if (SCHEMA_TO_JSON_TYPE.get(schema.getType()) == token) {
return decode(decoder, schema);
}
}
throw new IOException(String.format("No matching schema found for union type: %s for token: %s", unionSchema, token));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class JsonStructuredRecordDatumReader method decodeRecord.
@Override
protected StructuredRecord decodeRecord(Decoder decoder, Schema schema) throws IOException {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
JsonReader jsonReader = getJsonReader(decoder);
jsonReader.beginObject();
while (jsonReader.peek() != JsonToken.END_OBJECT) {
Schema.Field field = schema.getField(jsonReader.nextName());
if (field == null) {
// Ignore unrecognized fields
jsonReader.skipValue();
continue;
}
builder.set(field.getName(), decode(decoder, field.getSchema()));
}
jsonReader.endObject();
return builder.build();
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class AvroRecordFormatTest method testFlatRecord.
@Test
public void testFlatRecord() throws Exception {
Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("long", Schema.of(Schema.Type.LONG)), Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("string", Schema.of(Schema.Type.STRING)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))), Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
org.apache.avro.Schema avroSchema = convertSchema(schema);
GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("long", Long.MAX_VALUE).set("boolean", false).set("bytes", Charsets.UTF_8.encode("hello world")).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("string", "foo bar").set("array", Lists.newArrayList(1, 2, 3)).set("map", ImmutableMap.of("k1", 1, "k2", 2)).set("nullable", null).set("nullable2", "Hello").build();
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
Assert.assertEquals(Long.MAX_VALUE, actual.get("long"));
Assert.assertFalse((Boolean) actual.get("boolean"));
Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
Assert.assertEquals(Double.MAX_VALUE, actual.get("double"));
Assert.assertEquals(Float.MAX_VALUE, actual.get("float"));
Assert.assertEquals("foo bar", actual.get("string"));
Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
assertMapEquals(ImmutableMap.<String, Object>of("k1", 1, "k2", 2), (Map<Object, Object>) actual.get("map"));
Assert.assertNull(actual.get("nullable"));
Assert.assertEquals("Hello", actual.get("nullable2"));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class CombinedLogRecordFormat method validateSchema.
@Override
protected void validateSchema(Schema desiredSchema) throws UnsupportedTypeException {
// a valid schema is a record of simple types.
Iterator<Schema.Field> fields = desiredSchema.getFields().iterator();
while (fields.hasNext()) {
Schema.Field field = fields.next();
Schema schema = field.getSchema();
boolean isSimple = schema.getType().isSimpleType();
boolean isNullableSimple = schema.isNullableSimple();
if (!isSimple && !isNullableSimple) {
throw new UnsupportedTypeException("Field " + field.getName() + " is of invalid type.");
}
}
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class GrokRecordFormat method read.
@Override
public StructuredRecord read(StreamEvent event) throws UnexpectedFormatException {
String bodyAsStr = Bytes.toString(event.getBody(), Charsets.UTF_8);
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
Match gm = grok.match(bodyAsStr);
gm.captures();
Map<String, Object> x = gm.toMap();
for (Schema.Field field : schema.getFields()) {
String fieldName = field.getName();
Object value = x.get(fieldName);
if (value != null) {
builder.convertAndSet(fieldName, value.toString());
}
}
return builder.build();
}
Aggregations