use of org.apache.avro.generic.GenericRecord in project samza by apache.
the class TestMultiFileHdfsReader method testSequentialRead.
@Test
public void testSequentialRead() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
int index = 0;
while (multiReader.hasNext()) {
GenericRecord record = (GenericRecord) multiReader.readNext().getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
index++;
}
Assert.assertEquals(3 * NUM_EVENTS, index);
multiReader.close();
}
use of org.apache.avro.generic.GenericRecord in project voldemort by voldemort.
the class ClientExample method avroStoreExample.
public static void avroStoreExample() {
System.out.println("==============Avro store example=================");
// In production environment, the StoreClient instantiation should be done using factory pattern
// through a Framework such as Spring
String bootstrapUrl = "tcp://localhost:6666";
StoreClientFactory factory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrl));
StoreClient<GenericRecord, GenericRecord> client = factory.getStoreClient("avro-example");
// creating initial k-v pair
System.out.println("Creating initial Key and Value");
String keySchemaJson = "{ \"name\": \"key\", \"type\": \"record\", \"fields\": [{ \"name\": \"user_id\", \"type\": \"int\" }] }";
Schema keySchema = Schema.parse(keySchemaJson);
GenericRecord key = new GenericData.Record(keySchema);
key.put("user_id", 123);
String valueSchemaJson = "{\n" + " \"name\": \"value\",\n" + " \"type\": \"record\",\n" + " \"fields\": [{ \n" + " \"name\": \"user_id\",\n" + " \"type\": \"int\"\n" + " }, {\n" + " \"name\": \"gender\",\n" + " \"type\": \"string\"\n" + " }, {\n" + " \"name\": \"age\",\n" + " \"type\": \"int\",\n" + " \"optional\": true\n" + " }]\n" + " }";
Schema valueSchema = Schema.parse(valueSchemaJson);
GenericRecord value = new GenericData.Record(valueSchema);
value.put("user_id", 123);
value.put("gender", "male");
value.put("age", 23);
// put initial value
System.out.println("Putting Initial value");
client.put(key, value);
// get the value
System.out.println("Getting the value");
Versioned<GenericRecord> versioned = client.get(key);
System.out.println("Initial Versioned Object: " + String.valueOf(versioned));
System.out.println(" Initial Value: " + String.valueOf(versioned.getValue()));
// modify the value
System.out.println("Modifying the value");
GenericRecord modifiedRecord = versioned.getValue();
modifiedRecord.put("gender", "female");
modifiedRecord.put("age", 55);
versioned.setObject(modifiedRecord);
// update the value
System.out.println("Putting the new value");
client.put(key, versioned);
// get again and print
System.out.println("Getting the new value");
versioned = client.get(key);
System.out.println(" New Versioned Object: " + String.valueOf(versioned));
System.out.println(" New Value: " + String.valueOf(versioned.getValue()));
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class HiveExploreServiceStreamTest method createAvroEvent.
private byte[] createAvroEvent(org.apache.avro.Schema schema, Object... values) throws IOException {
GenericRecordBuilder builder = new GenericRecordBuilder(schema);
int i = 0;
for (org.apache.avro.Schema.Field field : schema.getFields()) {
builder.set(field.name(), values[i]);
i++;
}
GenericRecord record = builder.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class FileWriterHelper method generateAvroFile.
/**
* Generate an Avro file of schema (key String, value String) containing the records ("<prefix>i", "#i")
* for start <= i < end. The file is written using the passed-in output stream.
*/
public static void generateAvroFile(OutputStream out, String prefix, int start, int end) throws IOException {
Schema schema = Schema.createRecord("kv", null, null, false);
schema.setFields(ImmutableList.of(new Schema.Field("key", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, out);
try {
for (int i = start; i < end; i++) {
GenericRecord kv = new GenericData.Record(schema);
kv.put("key", prefix + i);
kv.put("value", "#" + i);
dataFileWriter.append(kv);
}
} finally {
Closeables.closeQuietly(dataFileWriter);
Closeables.closeQuietly(out);
}
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method groupByPartitionKey.
private Multimap<PartitionKey, GenericRecord> groupByPartitionKey(List<? extends GenericRecord> records, long now) {
HashMultimap<PartitionKey, GenericRecord> groupedByPartitionKey = HashMultimap.create();
for (GenericRecord record : records) {
PartitionKey key = PartitionKey.builder().addLongField("time", now).addIntField("zip", (int) record.get("zip")).build();
groupedByPartitionKey.put(key, record);
}
return groupedByPartitionKey;
}
Aggregations