use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMapReduce.
private void runDynamicPartitionerMapReduce(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
ImmutableMap<String, String> arguments = ImmutableMap.of(OUTPUT_PARTITION_KEY, Long.toString(now), allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.METADATA, partition.getMetadata().asMap());
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class AvroRecordFormatTest method toStreamEvent.
private StreamEvent toStreamEvent(GenericRecord record, boolean writeSchema) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
writer.write(record, encoder);
encoder.flush();
out.close();
byte[] serializedRecord = out.toByteArray();
String schemaString = record.getSchema().toString();
Map<String, String> headers = Maps.newHashMap();
if (writeSchema) {
headers.put(AvroRecordFormat.SCHEMA, schemaString);
headers.put(AvroRecordFormat.SCHEMA_HASH, Hashing.md5().hashString(schemaString, Charsets.UTF_8).toString());
}
return new StreamEvent(headers, ByteBuffer.wrap(serializedRecord));
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class AvroRecordFormatTest method testFlatRecord.
@Test
public void testFlatRecord() throws Exception {
Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("long", Schema.of(Schema.Type.LONG)), Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("string", Schema.of(Schema.Type.STRING)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))), Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))));
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
org.apache.avro.Schema avroSchema = convertSchema(schema);
GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("long", Long.MAX_VALUE).set("boolean", false).set("bytes", Charsets.UTF_8.encode("hello world")).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("string", "foo bar").set("array", Lists.newArrayList(1, 2, 3)).set("map", ImmutableMap.of("k1", 1, "k2", 2)).set("nullable", null).set("nullable2", "Hello").build();
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
Assert.assertEquals(Long.MAX_VALUE, actual.get("long"));
Assert.assertFalse((Boolean) actual.get("boolean"));
Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
Assert.assertEquals(Double.MAX_VALUE, actual.get("double"));
Assert.assertEquals(Float.MAX_VALUE, actual.get("float"));
Assert.assertEquals("foo bar", actual.get("string"));
Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
assertMapEquals(ImmutableMap.<String, Object>of("k1", 1, "k2", 2), (Map<Object, Object>) actual.get("map"));
Assert.assertNull(actual.get("nullable"));
Assert.assertEquals("Hello", actual.get("nullable2"));
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class MapReduceStreamInputTestRun method createEvent.
private byte[] createEvent(Schema schema, String ticker, int count, float price) throws IOException {
GenericRecord record = new GenericRecordBuilder(schema).set("ticker", ticker).set("num_traded", count).set("price", price).build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
use of org.apache.avro.generic.GenericRecord in project cdap by caskdata.
the class ClientMessagingService method encodeRollbackDetail.
/**
* Encodes the given {@link RollbackDetail} as expected by the rollback call. This method is rarely used
* as the call to {@link #rollback(TopicId, RollbackDetail)} expects a {@link ClientRollbackDetail} which
* already contains the encoded bytes.
*
* This method looks very similar to the {@code StoreHandler.encodeRollbackDetail} method, but is intended to have
* them separated. This is to allow client side classes be moved to separate module without any dependency
* on the server side (this can also be done with a util method in a common module, but it is kind of overkill
* for a simple method like this for now).
*/
private ByteBuffer encodeRollbackDetail(RollbackDetail rollbackDetail) throws IOException {
// Constructs the response object as GenericRecord
Schema schema = Schemas.V1.PublishResponse.SCHEMA;
GenericRecord record = new GenericData.Record(schema);
record.put("transactionWritePointer", rollbackDetail.getTransactionWritePointer());
GenericRecord rollbackRange = new GenericData.Record(schema.getField("rollbackRange").schema());
rollbackRange.put("startTimestamp", rollbackDetail.getStartTimestamp());
rollbackRange.put("startSequenceId", rollbackDetail.getStartSequenceId());
rollbackRange.put("endTimestamp", rollbackDetail.getEndTimestamp());
rollbackRange.put("endSequenceId", rollbackDetail.getEndSequenceId());
record.put("rollbackRange", rollbackRange);
ExposedByteArrayOutputStream os = new ExposedByteArrayOutputStream();
Encoder encoder = EncoderFactory.get().directBinaryEncoder(os, null);
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(Schemas.V1.PublishRequest.SCHEMA);
datumWriter.write(record, encoder);
return os.toByteBuffer();
}
Aggregations