use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SchemaTest method testCompatible.
@Test
public void testCompatible() throws UnsupportedTypeException {
Schema s1 = new ReflectionSchemaGenerator().generate(Node.class);
Schema s2 = new ReflectionSchemaGenerator().generate(Node3.class);
Schema s3 = new ReflectionSchemaGenerator().generate(Node4.class);
Assert.assertNotEquals(s1, s2);
Assert.assertTrue(s1.isCompatible(s2));
Assert.assertFalse(s2.isCompatible(s1));
Assert.assertTrue(s2.isCompatible(s3));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class SchemaTest method testParseJson.
@Test
public void testParseJson() throws IOException, UnsupportedTypeException {
Schema schema = new ReflectionSchemaGenerator().generate(Node.class);
Assert.assertEquals(schema, Schema.parseJson(schema.toString()));
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class HiveExploreServiceStreamTest method createAvroEvent.
private byte[] createAvroEvent(org.apache.avro.Schema schema, Object... values) throws IOException {
GenericRecordBuilder builder = new GenericRecordBuilder(schema);
int i = 0;
for (org.apache.avro.Schema.Field field : schema.getFields()) {
builder.set(field.name(), values[i]);
i++;
}
GenericRecord record = builder.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class HiveExploreServiceStreamTest method testAvroFormattedStream.
@Test
public void testAvroFormattedStream() throws Exception {
StreamId streamId = NAMESPACE_ID.stream("avroStream");
createStream(streamId);
try {
Schema schema = Schema.recordOf("purchase", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("num", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
setStreamProperties(NAMESPACE_ID.getNamespace(), "avroStream", properties);
// our schemas are compatible
org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
Double xPrice = 5 * 3.14 + 10 * 2.34;
Double yPrice = 1.23;
Double zPrice = 50 * 45.67 + 100 * 98.76;
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "SELECT `user`, sum(num) as total_num, sum(price * num) as total_price " + "FROM " + getTableName(streamId) + " GROUP BY `user` ORDER BY total_price DESC").get();
Assert.assertTrue(result.hasNext());
Assert.assertEquals(Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null), new ColumnDesc("total_num", "BIGINT", 2, null), new ColumnDesc("total_price", "DOUBLE", 3, null)), result.getResultSchema());
// should get 3 rows
// first row should be for userZ
List<Object> rowColumns = result.next().getColumns();
// toString b/c avro returns a utf8 object for strings
Assert.assertEquals("userZ", rowColumns.get(0).toString());
Assert.assertEquals(150L, rowColumns.get(1));
Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 2nd row, should be userX
rowColumns = result.next().getColumns();
Assert.assertEquals("userX", rowColumns.get(0).toString());
Assert.assertEquals(15L, rowColumns.get(1));
Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 3rd row, should be userY
rowColumns = result.next().getColumns();
Assert.assertEquals("userY", rowColumns.get(0).toString());
Assert.assertEquals(1L, rowColumns.get(1));
Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
// shouldn't be any more results
Assert.assertFalse(result.hasNext());
} finally {
dropStream(streamId);
}
}
use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.
the class StreamSerDe method initialize.
// initialize gets called multiple times by Hive. It may seem like a good idea to put additional settings into
// the conf, but be very careful when doing so. If there are multiple hive tables involved in a query, initialize
// for each table is called before input splits are fetched for any table. It is therefore not safe to put anything
// the input format may need into conf in this method. Rather, use StorageHandler's method to place needed config
// into the properties map there, which will get passed here and also copied into the job conf for the input
// format to consume.
@Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
// The columns property comes from the Hive metastore, which has it from the create table statement
// It is then important that this schema be accurate and in the right order - the same order as
// object inspectors will reflect them.
String streamName = properties.getProperty(Constants.Explore.STREAM_NAME);
String streamNamespace = properties.getProperty(Constants.Explore.STREAM_NAMESPACE);
// to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
if (streamNamespace == null) {
// we also still need an ObjectInspector as Hive uses it to check what columns the table has.
this.inspector = new ObjectDeserializer(properties, null).getInspector();
return;
}
StreamId streamId = new StreamId(streamNamespace, streamName);
try (ContextManager.Context context = ContextManager.getContext(conf)) {
Schema schema = null;
// Because it calls initialize just to get the object inspector
if (context != null) {
// Get the stream format from the stream config.
FormatSpecification formatSpec = getFormatSpec(properties, streamId, context);
this.streamFormat = (AbstractStreamEventRecordFormat) RecordFormats.createInitializedFormat(formatSpec);
schema = formatSpec.getSchema();
}
this.deserializer = new ObjectDeserializer(properties, schema, BODY_OFFSET);
this.inspector = deserializer.getInspector();
} catch (UnsupportedTypeException e) {
// this should have been validated up front when schema was set on the stream.
// if we hit this something went wrong much earlier.
LOG.error("Schema unsupported by format.", e);
throw new SerDeException("Schema unsupported by format.", e);
} catch (IOException e) {
LOG.error("Could not get the config for stream {}.", streamName, e);
throw new SerDeException("Could not get the config for stream " + streamName, e);
} catch (Exception e) {
LOG.error("Could not create the format for stream {}.", streamName, e);
throw new SerDeException("Could not create the format for stream " + streamName, e);
}
}
Aggregations