use of org.apache.avro.mapred.AvroKey in project spark-dataflow by cloudera.
the class TransformTranslator method readAvro.
private static <T> TransformEvaluator<AvroIO.Read.Bound<T>> readAvro() {
return new TransformEvaluator<AvroIO.Read.Bound<T>>() {
@Override
public void evaluate(AvroIO.Read.Bound<T> transform, EvaluationContext context) {
String pattern = transform.getFilepattern();
JavaSparkContext jsc = context.getSparkContext();
@SuppressWarnings("unchecked") JavaRDD<AvroKey<T>> avroFile = (JavaRDD<AvroKey<T>>) (JavaRDD<?>) jsc.newAPIHadoopFile(pattern, AvroKeyInputFormat.class, AvroKey.class, NullWritable.class, new Configuration()).keys();
JavaRDD<WindowedValue<T>> rdd = avroFile.map(new Function<AvroKey<T>, T>() {
@Override
public T call(AvroKey<T> key) {
return key.datum();
}
}).map(WindowingHelpers.<T>windowFunction());
context.setOutputRDD(transform, rdd);
}
};
}
use of org.apache.avro.mapred.AvroKey in project incubator-gobblin by apache.
the class AvroKeyDedupReducerTest method testReduce.
@Test
public void testReduce() throws IOException, InterruptedException {
Schema keySchema = new Schema.Parser().parse(KEY_SCHEMA);
GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema());
keyRecordBuilder.set("partitionKey", 1);
keyRecordBuilder.set("environment", "test");
keyRecordBuilder.set("subKey", "2");
GenericRecord record = keyRecordBuilder.build();
keyRecordBuilder = new GenericRecordBuilder(keySchema);
keyRecordBuilder.set("key", record);
GenericRecord keyRecord = keyRecordBuilder.build();
// Test reducer with delta field "scn"
Schema fullSchema = new Schema.Parser().parse(FULL_SCHEMA);
AvroValue<GenericRecord> fullRecord1 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord2 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord3 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord4 = new AvroValue<>();
GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema);
fullRecordBuilder1.set("key", record);
fullRecordBuilder1.set("scn", 123);
fullRecordBuilder1.set("scn2", 100);
fullRecord1.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 125);
fullRecordBuilder1.set("scn2", 1);
fullRecord2.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 124);
fullRecordBuilder1.set("scn2", 10);
fullRecord3.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 122);
fullRecordBuilder1.set("scn2", 1000);
fullRecord4.datum(fullRecordBuilder1.build());
Configuration conf = mock(Configuration.class);
when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName());
when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json");
when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME, FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME);
AvroKeyDedupReducer reducer = new AvroKeyDedupReducer();
WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class);
when(reducerContext.getConfiguration()).thenReturn(conf);
Counter moreThan1Counter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter);
Counter dedupedCounter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter);
Counter recordCounter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter);
reducer.setup(reducerContext);
doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class));
List<AvroValue<GenericRecord>> valueIterable = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
AvroKey<GenericRecord> key = new AvroKey<>();
key.datum(keyRecord);
reducer.reduce(key, valueIterable, reducerContext);
Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum());
// Test reducer without delta field
Configuration conf2 = mock(Configuration.class);
when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null);
when(reducerContext.getConfiguration()).thenReturn(conf2);
AvroKeyDedupReducer reducer2 = new AvroKeyDedupReducer();
reducer2.setup(reducerContext);
reducer2.reduce(key, valueIterable, reducerContext);
Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum());
// Test reducer with compound delta key.
Schema fullSchema2 = new Schema.Parser().parse(FULL_SCHEMA_WITH_TWO_DELTA_FIELDS);
GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2);
fullRecordBuilder2.set("key", record);
fullRecordBuilder2.set("scn", 123);
fullRecordBuilder2.set("scn2", 100);
fullRecord1.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 125);
fullRecordBuilder2.set("scn2", 1000);
fullRecord2.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 126);
fullRecordBuilder2.set("scn2", 1000);
fullRecord3.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 130);
fullRecordBuilder2.set("scn2", 100);
fullRecord4.datum(fullRecordBuilder2.build());
List<AvroValue<GenericRecord>> valueIterable2 = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
reducer.reduce(key, valueIterable2, reducerContext);
Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum());
}
Aggregations