use of org.apache.avro.file.DataFileWriter in project pinot by linkedin.
the class PregeneratedHllTest method createAvroWithHll.
public File createAvroWithHll(File newAvroFile, String inputAvro, String column, int log2m) throws IOException {
String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(inputAvro));
try (DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath))) {
Schema currentSchema = avroReader.getSchema();
List<Schema.Field> fields = currentSchema.getFields();
List<Schema.Field> newFieldList = new ArrayList<>(fields.size());
for (Schema.Field field : fields) {
newFieldList.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
}
final String hllColumnName = column + "_hll";
newFieldList.add(new Schema.Field(hllColumnName, Schema.create(Schema.Type.STRING), null, null));
Schema updatedSchema = Schema.createRecord("hllschema", "doc", this.getClass().getName(), false);
updatedSchema.setFields(newFieldList);
try (DataFileWriter<GenericData.Record> writer = new DataFileWriter<GenericData.Record>(new GenericDatumWriter<GenericData.Record>(updatedSchema))) {
writer.create(updatedSchema, newAvroFile);
while (avroReader.hasNext()) {
GenericRecord record = avroReader.next();
GenericData.Record newRecord = new GenericData.Record(updatedSchema);
for (Schema.Field field : fields) {
newRecord.put(field.name(), record.get(field.name()));
}
newRecord.put(hllColumnName, HllUtil.singleValueHllAsString(log2m, record.get(column)));
writer.append(newRecord);
}
}
}
return newAvroFile;
}
use of org.apache.avro.file.DataFileWriter in project avro-kafka-storm by ransilberman.
the class MainTest method testDataFile.
@Test
public void testDataFile() throws IOException {
File fileOut = new File("data.avro");
File fileIn = new File("data.avro");
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("revision", 1L);
datum.put("siteId", "28280110");
datum.put("eventType", "PLine");
datum.put("timeStamp", System.currentTimeMillis());
datum.put("sessionId", "123456II");
Map<String, Schema> unions = new HashMap<String, Schema>();
List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
for (Schema sch : typeList) {
unions.put(sch.getName(), sch);
}
GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
plineDatum.put("text", "How can I help you?");
plineDatum.put("lineType", 1);
plineDatum.put("repId", "REPID12345");
datum.put("subrecord", plineDatum);
//write the file
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
dataFileWriter.create(schema, fileOut);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.close();
//read the file
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
for (GenericRecord record : dataFileReader) {
assertThat(record.get("siteId").toString(), is("28280110"));
assertThat(record.get("eventType").toString(), is("PLine"));
}
}
use of org.apache.avro.file.DataFileWriter in project sling by apache.
the class AvroContentSerializer method exportToStream.
@Override
public void exportToStream(ResourceResolver resourceResolver, DistributionExportOptions options, OutputStream outputStream) throws DistributionException {
DatumWriter<AvroShallowResource> datumWriter = new SpecificDatumWriter<AvroShallowResource>(AvroShallowResource.class);
DataFileWriter<AvroShallowResource> writer = new DataFileWriter<AvroShallowResource>(datumWriter);
try {
writer.create(schema, outputStream);
} catch (IOException e) {
throw new DistributionException(e);
}
try {
DistributionExportFilter filter = options.getFilter();
for (DistributionExportFilter.TreeFilter treeFilter : filter.getNodeFilters()) {
String path = treeFilter.getPath();
Resource resource = resourceResolver.getResource(path);
AvroShallowResource avroShallowResource = getAvroShallowResource(treeFilter, filter.getPropertyFilter(), resource);
writer.append(avroShallowResource);
}
outputStream.flush();
} catch (Exception e) {
throw new DistributionException(e);
} finally {
try {
writer.close();
} catch (IOException e) {
// do nothing
}
}
}
use of org.apache.avro.file.DataFileWriter in project samza by apache.
the class TestAvroFileHdfsReader method writeTestEventsToFile.
public static void writeTestEventsToFile(String path, int numEvents) throws Exception {
Schema schema = Schema.parse(TestAvroFileHdfsReader.class.getResourceAsStream("/reader/TestEvent.avsc"));
File file = new File(path);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer);
dataFileWriter.create(schema, file);
for (int i = 0; i < numEvents; i++) {
GenericRecord datum = new GenericData.Record(schema);
datum.put(FIELD_1, i);
datum.put(FIELD_2, "string_" + i);
dataFileWriter.append(datum);
}
dataFileWriter.close();
}
Aggregations