use of org.apache.avro.generic.GenericDatumWriter in project pinot by linkedin.
the class BaseClusterIntegrationTest method pushAvroIntoKafka.
public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
Properties properties = new Properties();
properties.put("metadata.broker.list", kafkaBroker);
properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
properties.put("request.required.acks", "1");
ProducerConfig producerConfig = new ProducerConfig(properties);
Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
for (File avroFile : avroFiles) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
int recordCount = 0;
List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
int messagesInThisBatch = 0;
for (GenericRecord genericRecord : reader) {
outputStream.reset();
if (header != null && 0 < header.length) {
outputStream.write(header);
}
datumWriter.write(genericRecord, binaryEncoder);
binaryEncoder.flush();
byte[] bytes = outputStream.toByteArray();
KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
if (BATCH_KAFKA_MESSAGES) {
messagesToWrite.add(data);
messagesInThisBatch++;
if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
messagesInThisBatch = 0;
producer.send(messagesToWrite);
messagesToWrite.clear();
}
} else {
producer.send(data);
}
recordCount += 1;
}
if (BATCH_KAFKA_MESSAGES) {
LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
producer.send(messagesToWrite);
}
outputStream.close();
reader.close();
LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
LOGGER.info("Total records written so far " + totalRecordCount);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
use of org.apache.avro.generic.GenericDatumWriter in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment1.
protected void generateAndUploadRandomSegment1(final String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
final File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
final int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
final String TAR_GZ_FILE_EXTENTION = ".tar.gz";
File segmentTarDir = new File(_tarsDir, segmentName);
buildSegment(segmentTarDir, avroFile, segmentIndex, segmentName, 0);
String segmentFileName = segmentName;
for (String name : segmentTarDir.list()) {
if (name.endsWith(TAR_GZ_FILE_EXTENTION)) {
segmentFileName = name;
}
}
File file = new File(segmentTarDir, segmentFileName);
long segmentLength = file.length();
final File segmentTarDir1 = new File(_tarsDir, segmentName);
FileUtils.deleteQuietly(segmentTarDir);
new Thread(new Runnable() {
@Override
public void run() {
try {
buildSegment(segmentTarDir1, avroFile, segmentIndex, segmentName, 5);
} catch (Exception e) {
}
}
}).start();
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, segmentLength, 5, 5);
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
use of org.apache.avro.generic.GenericDatumWriter in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment.
protected void generateAndUploadRandomSegment(String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
File segmentTarDir = new File(_tarsDir, segmentName);
ensureDirectoryExistsAndIsEmpty(segmentTarDir);
ExecutorService executor = MoreExecutors.sameThreadExecutor();
buildSegmentsFromAvro(Collections.singletonList(avroFile), executor, segmentIndex, new File(_segmentsDir, segmentName), segmentTarDir, this.tableName, false, null);
executor.shutdown();
executor.awaitTermination(1L, TimeUnit.MINUTES);
for (String segmentFileName : segmentTarDir.list()) {
File file = new File(segmentTarDir, segmentFileName);
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, file.length());
}
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class FakeJobService method writeRowsHelper.
private void writeRowsHelper(List<TableRow> rows, Schema avroSchema, String destinationPattern, int shard) throws IOException {
String filename = destinationPattern.replace("*", String.format("%012d", shard));
try (WritableByteChannel channel = FileSystems.create(FileSystems.matchNewResource(filename, false), MimeTypes.BINARY);
DataFileWriter<GenericRecord> tableRowWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)).create(avroSchema, Channels.newOutputStream(channel))) {
for (Map<String, Object> record : rows) {
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
for (Map.Entry<String, Object> field : record.entrySet()) {
genericRecordBuilder.set(field.getKey(), field.getValue());
}
tableRowWriter.append(genericRecordBuilder.build());
}
} catch (IOException e) {
throw new IllegalStateException(String.format("Could not create destination for extract job %s", filename), e);
}
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
Aggregations