use of org.apache.avro.generic.GenericDatumWriter in project h2o-3 by h2oai.
the class AvroFileGenerator method generatePrimitiveTypes.
public static File generatePrimitiveTypes(String filename, int nrows) throws IOException {
File parentDir = Files.createTempDir();
File f = new File(parentDir, filename);
// Write output records
DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
Schema schema = SchemaBuilder.builder().record("test_primitive_types").fields().name("CString").type("string").noDefault().name("CBytes").type("bytes").noDefault().name("CInt").type("int").noDefault().name("CLong").type("long").noDefault().name("CFloat").type("float").noDefault().name("CDouble").type("double").noDefault().name("CBoolean").type("boolean").noDefault().name("CNull").type("null").noDefault().endRecord();
try {
dw.create(schema, f);
for (int i = 0; i < nrows; i++) {
GenericRecord gr = new GenericData.Record(schema);
gr.put("CString", String.valueOf(i));
gr.put("CBytes", ByteBuffer.wrap(StringUtils.toBytes(i)));
gr.put("CInt", i);
gr.put("CLong", Long.valueOf(i));
gr.put("CFloat", Float.valueOf(i));
gr.put("CDouble", Double.valueOf(i));
gr.put("CBoolean", (i & 1) == 1);
gr.put("CNull", null);
dw.append(gr);
}
return f;
} finally {
dw.close();
}
}
use of org.apache.avro.generic.GenericDatumWriter in project h2o-3 by h2oai.
the class AvroFileGenerator method generateEnumTypes.
public static File generateEnumTypes(String filename, int nrows, String[][] categories) throws IOException {
assert categories.length == 2 : "Needs only 2 columns";
File parentDir = Files.createTempDir();
File f = new File(parentDir, filename);
DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
Schema enumSchema1 = SchemaBuilder.enumeration("CEnum1").symbols(categories[0]);
Schema enumSchema2 = SchemaBuilder.enumeration("CEnum2").symbols(categories[1]);
Schema schema = SchemaBuilder.builder().record("test_enum_types").fields().name("CEnum").type(enumSchema1).noDefault().name("CUEnum").type().optional().type(enumSchema2).endRecord();
System.out.println(schema);
int numOfCategories1 = categories[0].length;
int numOfCategories2 = categories[1].length;
try {
dw.create(schema, f);
for (int i = 0; i < nrows; i++) {
GenericRecord gr = new GenericData.Record(schema);
gr.put("CEnum", new GenericData.EnumSymbol(enumSchema1, categories[0][i % numOfCategories1]));
gr.put("CUEnum", i % (numOfCategories2 + 1) == 0 ? null : new GenericData.EnumSymbol(enumSchema2, categories[1][i % numOfCategories2]));
dw.append(gr);
}
return f;
} finally {
dw.close();
;
}
}
use of org.apache.avro.generic.GenericDatumWriter in project haivvreo by jghoman.
the class AvroGenericRecordWritable method write.
@Override
public void write(DataOutput out) throws IOException {
// Write schema since we need it to pull the data out. (see point #1 above)
String schemaString = record.getSchema().toString(false);
out.writeUTF(schemaString);
// Write record to byte buffer
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>();
BinaryEncoder be = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null);
gdw.setSchema(record.getSchema());
gdw.write(record, be);
}
use of org.apache.avro.generic.GenericDatumWriter in project haivvreo by jghoman.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"com.linkedin.haivvreo\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { Schema.parse(v0), Schema.parse(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
use of org.apache.avro.generic.GenericDatumWriter in project databus by linkedin.
the class DataGenerator method printAvroJson.
private static void printAvroJson(GenericRecord record, OutputStream outs) throws IOException {
JsonEncoder jsonEnc = new JsonEncoder(record.getSchema(), outs);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(record.getSchema());
datumWriter.write(record, jsonEnc);
jsonEnc.flush();
}
Aggregations