use of org.apache.orc.mapred.OrcValue in project incubator-gobblin by apache.
the class OrcKeyDedupReducer method initReusableObject.
@Override
protected void initReusableObject() {
outKey = NullWritable.get();
outValue = new OrcValue();
}
use of org.apache.orc.mapred.OrcValue in project incubator-gobblin by apache.
the class OrcKeyDedupReducer method reduce.
@Override
protected void reduce(OrcKey key, Iterable<OrcValue> values, Context context) throws IOException, InterruptedException {
/* Map from hash of value(Typed in OrcStruct) object to its times of duplication*/
Map<Integer, Integer> valuesToRetain = new HashMap<>();
int valueHash = 0;
for (OrcValue value : values) {
valueHash = ((OrcStruct) value.value).hashCode();
if (valuesToRetain.containsKey(valueHash)) {
valuesToRetain.put(valueHash, valuesToRetain.get(valueHash) + 1);
} else {
valuesToRetain.put(valueHash, 1);
writeRetainedValue(value, context);
}
}
/* At this point, keyset of valuesToRetain should contains all different OrcValue. */
for (Map.Entry<Integer, Integer> entry : valuesToRetain.entrySet()) {
updateCounters(entry.getValue(), context);
}
}
use of org.apache.orc.mapred.OrcValue in project incubator-gobblin by apache.
the class OrcValueMapper method setup.
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
this.jobConf = new JobConf(context.getConfiguration());
this.outKey = new OrcKey();
this.outKey.configure(jobConf);
this.outValue = new OrcValue();
this.outValue.configure(jobConf);
// This is the consistent input-schema among all mappers.
this.mrInputSchema = TypeDescription.fromString(context.getConfiguration().get(OrcConf.MAPRED_INPUT_SCHEMA.getAttribute()));
this.shuffleKeySchema = TypeDescription.fromString(context.getConfiguration().get(MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
}