use of org.apache.avro.generic.GenericRecordBuilder in project h2o-3 by h2oai.
the class ParquetFileGenerator method generateAvroPrimitiveTypes.
static File generateAvroPrimitiveTypes(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Schema schema = new Schema.Parser().parse(Resources.getResource("PrimitiveAvro.avsc").openStream());
AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(new Path(f.getPath()), schema);
try {
DateFormat format = new SimpleDateFormat("yy-MMM-dd:hh.mm.ss.SSS aaa");
for (int i = 0; i < nrows; i++) {
GenericData.Record record = new GenericRecordBuilder(schema).set("mynull", null).set("myboolean", i % 2 == 0).set("myint", 1 + i).set("mylong", 2L + i).set("myfloat", 3.1f + i).set("mydouble", 4.1 + i).set("mydate", format.format(new Date(date.getTime() - (i * 1000 * 3600)))).set("myuuid", UUID.randomUUID()).set("mystring", "hello world: " + i).set("myenum", i % 2 == 0 ? "a" : "b").build();
writer.write(record);
}
} finally {
writer.close();
}
return f;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class LoggingServlet method doGet.
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.setContentType("text/html");
PrintWriter pw = response.getWriter();
pw.println("<html>");
pw.println("<head><title>CDK Example</title></title>");
pw.println("<body>");
String message = request.getParameter("message");
if (message == null) {
pw.println("<p>No message specified.</p>");
} else {
pw.println("<p>Message: " + message + "</p>");
GenericData.Record event = new GenericRecordBuilder(schema).set("id", id.incrementAndGet()).set("message", message).build();
logger.info(event);
}
pw.println("<p><a href=\"/logging-webapp\">Home</a></p>");
pw.println("</body></html>");
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class App method run.
@Override
public int run(String[] args) throws Exception {
// Get a log4j logger
Logger logger = Logger.getLogger(App.class);
// Find the schema from the repository
DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
Schema schema = repo.load("events").getDescriptor().getSchema();
// Build some events using the generic Avro API and log them using log4j
GenericRecordBuilder builder = new GenericRecordBuilder(schema);
for (long i = 0; i < 10; i++) {
GenericRecord event = builder.set("id", i).set("message", "Hello " + i).build();
System.out.println("Sending to log4j: " + event);
logger.info(event);
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class GenerateSimpleLogs method run.
@Override
public int run(String[] args) throws Exception {
// going to generate a lot of random log messages
final Random rand = new Random();
// open the repository
final DatasetRepository repo = DatasetRepositories.open("repo:file:/tmp/data");
// data is written to the staging dataset
final Dataset<GenericRecord> staging = repo.load("logs-staging");
final DatasetWriter<GenericRecord> writer = staging.newWriter();
// this is going to build our simple log records
final GenericRecordBuilder builder = new GenericRecordBuilder(staging.getDescriptor().getSchema());
// generate timestamps 1 second apart starting... now
final Calendar now = Calendar.getInstance();
final long yesterday = now.getTimeInMillis() - DAY_IN_MILLIS;
try {
writer.open();
// this is a little less than 24 hours worth of messages
for (int second : Ranges.closed(0, 15000).asSet(DiscreteDomains.integers())) {
LOG.info("Generating log message " + second);
builder.set("timestamp", yesterday + second * 5000);
builder.set("component", "GenerateSimpleLogs");
int level = rand.nextInt(LOG_LEVELS.length);
builder.set("level", LOG_LEVELS[level]);
builder.set("message", LOG_MESSAGES[level]);
writer.write(builder.build());
}
} finally {
writer.flush();
writer.close();
}
return 0;
}
use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.
the class StagingToPersistentSerial method getPartitionKey.
@SuppressWarnings("deprecation")
private static PartitionKey getPartitionKey(Dataset data, long timestamp) {
// need to build a fake record to get a partition key
final GenericRecordBuilder builder = new GenericRecordBuilder(data.getDescriptor().getSchema());
builder.set("timestamp", timestamp);
builder.set("level", "INFO");
builder.set("component", "StagingToPersistentSerial");
builder.set("message", "Fake log message");
// access the partition strategy, which produces keys from records
final PartitionStrategy partitioner = data.getDescriptor().getPartitionStrategy();
return partitioner.partitionKeyForEntity(builder.build());
}
Aggregations