Search in sources :

Example 6 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project h2o-3 by h2oai.

the class ParquetFileGenerator method generateAvroPrimitiveTypes.

static File generateAvroPrimitiveTypes(File parentDir, String filename, int nrows, Date date) throws IOException {
    File f = new File(parentDir, filename);
    Schema schema = new Schema.Parser().parse(Resources.getResource("PrimitiveAvro.avsc").openStream());
    AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(new Path(f.getPath()), schema);
    try {
        DateFormat format = new SimpleDateFormat("yy-MMM-dd:hh.mm.ss.SSS aaa");
        for (int i = 0; i < nrows; i++) {
            GenericData.Record record = new GenericRecordBuilder(schema).set("mynull", null).set("myboolean", i % 2 == 0).set("myint", 1 + i).set("mylong", 2L + i).set("myfloat", 3.1f + i).set("mydouble", 4.1 + i).set("mydate", format.format(new Date(date.getTime() - (i * 1000 * 3600)))).set("myuuid", UUID.randomUUID()).set("mystring", "hello world: " + i).set("myenum", i % 2 == 0 ? "a" : "b").build();
            writer.write(record);
        }
    } finally {
        writer.close();
    }
    return f;
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) AvroParquetWriter(org.apache.parquet.avro.AvroParquetWriter) GenericData(org.apache.avro.generic.GenericData) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) SimpleDateFormat(java.text.SimpleDateFormat)

Example 7 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.

the class LoggingServlet method doGet.

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    response.setContentType("text/html");
    PrintWriter pw = response.getWriter();
    pw.println("<html>");
    pw.println("<head><title>CDK Example</title></title>");
    pw.println("<body>");
    String message = request.getParameter("message");
    if (message == null) {
        pw.println("<p>No message specified.</p>");
    } else {
        pw.println("<p>Message: " + message + "</p>");
        GenericData.Record event = new GenericRecordBuilder(schema).set("id", id.incrementAndGet()).set("message", message).build();
        logger.info(event);
    }
    pw.println("<p><a href=\"/logging-webapp\">Home</a></p>");
    pw.println("</body></html>");
}
Also used : GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericData(org.apache.avro.generic.GenericData) PrintWriter(java.io.PrintWriter)

Example 8 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.

the class App method run.

@Override
public int run(String[] args) throws Exception {
    // Get a log4j logger
    Logger logger = Logger.getLogger(App.class);
    // Find the schema from the repository
    DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
    Schema schema = repo.load("events").getDescriptor().getSchema();
    // Build some events using the generic Avro API and log them using log4j
    GenericRecordBuilder builder = new GenericRecordBuilder(schema);
    for (long i = 0; i < 10; i++) {
        GenericRecord event = builder.set("id", i).set("message", "Hello " + i).build();
        System.out.println("Sending to log4j: " + event);
        logger.info(event);
    }
    return 0;
}
Also used : DatasetRepository(com.cloudera.cdk.data.DatasetRepository) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Logger(org.apache.log4j.Logger) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 9 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.

the class GenerateSimpleLogs method run.

@Override
public int run(String[] args) throws Exception {
    // going to generate a lot of random log messages
    final Random rand = new Random();
    // open the repository
    final DatasetRepository repo = DatasetRepositories.open("repo:file:/tmp/data");
    // data is written to the staging dataset
    final Dataset<GenericRecord> staging = repo.load("logs-staging");
    final DatasetWriter<GenericRecord> writer = staging.newWriter();
    // this is going to build our simple log records
    final GenericRecordBuilder builder = new GenericRecordBuilder(staging.getDescriptor().getSchema());
    // generate timestamps 1 second apart starting... now
    final Calendar now = Calendar.getInstance();
    final long yesterday = now.getTimeInMillis() - DAY_IN_MILLIS;
    try {
        writer.open();
        // this is a little less than 24 hours worth of messages
        for (int second : Ranges.closed(0, 15000).asSet(DiscreteDomains.integers())) {
            LOG.info("Generating log message " + second);
            builder.set("timestamp", yesterday + second * 5000);
            builder.set("component", "GenerateSimpleLogs");
            int level = rand.nextInt(LOG_LEVELS.length);
            builder.set("level", LOG_LEVELS[level]);
            builder.set("message", LOG_MESSAGES[level]);
            writer.write(builder.build());
        }
    } finally {
        writer.flush();
        writer.close();
    }
    return 0;
}
Also used : Random(java.util.Random) DatasetRepository(com.cloudera.cdk.data.DatasetRepository) Calendar(java.util.Calendar) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 10 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdk-examples by cloudera.

the class StagingToPersistentSerial method getPartitionKey.

@SuppressWarnings("deprecation")
private static PartitionKey getPartitionKey(Dataset data, long timestamp) {
    // need to build a fake record to get a partition key
    final GenericRecordBuilder builder = new GenericRecordBuilder(data.getDescriptor().getSchema());
    builder.set("timestamp", timestamp);
    builder.set("level", "INFO");
    builder.set("component", "StagingToPersistentSerial");
    builder.set("message", "Fake log message");
    // access the partition strategy, which produces keys from records
    final PartitionStrategy partitioner = data.getDescriptor().getPartitionStrategy();
    return partitioner.partitionKeyForEntity(builder.build());
}
Also used : GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) PartitionStrategy(com.cloudera.cdk.data.PartitionStrategy)

Aggregations

GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)17 GenericRecord (org.apache.avro.generic.GenericRecord)14 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)6 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)5 Schema (co.cask.cdap.api.data.schema.Schema)5 Random (java.util.Random)5 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)4 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)4 DatasetDescriptor (com.cloudera.cdk.data.DatasetDescriptor)4 Test (org.junit.Test)4 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)3 PartitionStrategy (com.cloudera.cdk.data.PartitionStrategy)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 IOException (java.io.IOException)2 Schema (org.apache.avro.Schema)2 GenericData (org.apache.avro.generic.GenericData)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)1 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1