use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.
the class SpecificAvroGroupByTest method createPersonAvroFile.
private void createPersonAvroFile(File avroFile) throws IOException {
Builder person = Person.newBuilder();
person.setAge(40);
person.setName("Bob");
List<CharSequence> siblingNames = Lists.newArrayList();
siblingNames.add("Bob" + "1");
siblingNames.add("Bob" + "2");
person.setSiblingnames(siblingNames);
FileOutputStream outputStream = new FileOutputStream(avroFile);
SpecificDatumWriter<Person> writer = new SpecificDatumWriter<Person>(Person.class);
DataFileWriter<Person> dataFileWriter = new DataFileWriter<Person>(writer);
dataFileWriter.create(Person.SCHEMA$, outputStream);
dataFileWriter.append(person.build());
dataFileWriter.close();
outputStream.close();
}
use of org.apache.avro.file.DataFileWriter in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment1.
protected void generateAndUploadRandomSegment1(final String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
final File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
final int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
final String TAR_GZ_FILE_EXTENTION = ".tar.gz";
File segmentTarDir = new File(_tarsDir, segmentName);
buildSegment(segmentTarDir, avroFile, segmentIndex, segmentName, 0);
String segmentFileName = segmentName;
for (String name : segmentTarDir.list()) {
if (name.endsWith(TAR_GZ_FILE_EXTENTION)) {
segmentFileName = name;
}
}
File file = new File(segmentTarDir, segmentFileName);
long segmentLength = file.length();
final File segmentTarDir1 = new File(_tarsDir, segmentName);
FileUtils.deleteQuietly(segmentTarDir);
new Thread(new Runnable() {
@Override
public void run() {
try {
buildSegment(segmentTarDir1, avroFile, segmentIndex, segmentName, 5);
} catch (Exception e) {
}
}
}).start();
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, segmentLength, 5, 5);
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
use of org.apache.avro.file.DataFileWriter in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment.
protected void generateAndUploadRandomSegment(String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
File segmentTarDir = new File(_tarsDir, segmentName);
ensureDirectoryExistsAndIsEmpty(segmentTarDir);
ExecutorService executor = MoreExecutors.sameThreadExecutor();
buildSegmentsFromAvro(Collections.singletonList(avroFile), executor, segmentIndex, new File(_segmentsDir, segmentName), segmentTarDir, this.tableName, false, null);
executor.shutdown();
executor.awaitTermination(1L, TimeUnit.MINUTES);
for (String segmentFileName : segmentTarDir.list()) {
File file = new File(segmentTarDir, segmentFileName);
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, file.length());
}
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
use of org.apache.avro.file.DataFileWriter in project druid by druid-io.
the class AvroHadoopInputRowParserTest method buildPigAvro.
private static GenericRecord buildPigAvro(GenericRecord datum, String inputStorage, String outputStorage) throws IOException {
final File tmpDir = Files.createTempDir();
FileReader<GenericRecord> reader = null;
PigServer pigServer = null;
try {
// 0. write avro object into temp file.
File someAvroDatumFile = new File(tmpDir, "someAvroDatum.avro");
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>());
dataFileWriter.create(SomeAvroDatum.getClassSchema(), someAvroDatumFile);
dataFileWriter.append(datum);
dataFileWriter.close();
// 1. read avro files into Pig
pigServer = new PigServer(ExecType.LOCAL);
pigServer.registerQuery(String.format("A = LOAD '%s' USING %s;", someAvroDatumFile, inputStorage));
// 2. write new avro file using AvroStorage
File outputDir = new File(tmpDir, "output");
pigServer.store("A", String.valueOf(outputDir), outputStorage);
// 3. read avro object from AvroStorage
reader = DataFileReader.openReader(new File(outputDir, "part-m-00000.avro"), new GenericDatumReader<GenericRecord>());
return reader.next();
} finally {
if (pigServer != null) {
pigServer.shutdown();
}
Closeables.close(reader, true);
FileUtils.deleteDirectory(tmpDir);
}
}
use of org.apache.avro.file.DataFileWriter in project crunch by cloudera.
the class AvroOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Schema schema = null;
String outputName = conf.get("crunch.namedoutput");
if (outputName != null && !outputName.isEmpty()) {
schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
} else {
schema = AvroJob.getOutputSchema(context.getConfiguration());
}
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
@Override
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
WRITER.append(wrapper.datum());
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
WRITER.close();
}
};
}
Aggregations