use of org.apache.avro.mapred.AvroKey in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition_IntegerMinValue.
@Test
public void testGetPartition_IntegerMinValue() {
IndexedRecord indexedRecord = new MockIndexedRecord(Integer.MIN_VALUE);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(0, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), Integer.MAX_VALUE));
}
use of org.apache.avro.mapred.AvroKey in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition.
@Test
public void testGetPartition() {
IndexedRecord indexedRecord = new MockIndexedRecord(3);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
use of org.apache.avro.mapred.AvroKey in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition_NegativeHashValue.
@Test
public void testGetPartition_NegativeHashValue() {
IndexedRecord indexedRecord = new MockIndexedRecord(-3);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
use of org.apache.avro.mapred.AvroKey in project crunch by cloudera.
the class SafeAvroSerialization method getSerializer.
/**
* Returns the specified output serializer.
*/
public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) {
// AvroWrapper used for final output, AvroKey or AvroValue for map output
boolean isFinalOutput = c.equals(AvroWrapper.class);
Configuration conf = getConf();
Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)));
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
ReflectDatumWriter<T> writer = factory.getWriter();
writer.setSchema(schema);
return new AvroWrapperSerializer(writer);
}
use of org.apache.avro.mapred.AvroKey in project components by Talend.
the class SimpleRecordFormatAvroIO method read.
@Override
public PCollection<IndexedRecord> read(PBegin in) {
// Reuseable coder.
LazyAvroCoder<Object> lac = LazyAvroCoder.of();
AvroHdfsFileSource source = AvroHdfsFileSource.of(doAs, path, lac);
source.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration());
source.setLimit(limit);
PCollection<KV<AvroKey, NullWritable>> read = //
in.apply(Read.from(source)).setCoder(source.getDefaultOutputCoder());
PCollection<AvroKey> pc1 = read.apply(Keys.<AvroKey>create());
PCollection<Object> pc2 = pc1.apply(ParDo.of(new ExtractRecordFromAvroKey()));
pc2 = pc2.setCoder(lac);
PCollection<IndexedRecord> pc3 = pc2.apply(ConvertToIndexedRecord.<Object>of());
return pc3;
}
Aggregations