use of org.apache.hadoop.mapred.OutputCollector in project parquet-mr by apache.
the class ParquetValueScheme method sink.
@SuppressWarnings("unchecked")
@Override
public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException {
TupleEntry tuple = sc.getOutgoingEntry();
if (tuple.size() != 1) {
throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields());
}
T value = (T) tuple.getObject(0);
OutputCollector output = sc.getOutput();
output.collect(null, value);
}
use of org.apache.hadoop.mapred.OutputCollector in project SpyGlass by ParallelAI.
the class JDBCScheme method sink.
@Override
public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
// it's ok to use NULL here so the collector does not write anything
TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
OutputCollector outputCollector = sinkCall.getOutput();
if (updateBy != null) {
Tuple allValues = tupleEntry.selectTuple(updateValueFields);
Tuple updateValues = tupleEntry.selectTuple(updateByFields);
allValues = cleanTuple(allValues);
TupleRecord key = new TupleRecord(allValues);
if (updateValues.equals(updateIfTuple))
outputCollector.collect(key, null);
else
outputCollector.collect(key, key);
return;
}
Tuple result = tupleEntry.selectTuple(getSinkFields());
result = cleanTuple(result);
outputCollector.collect(new TupleRecord(result), null);
}
use of org.apache.hadoop.mapred.OutputCollector in project SpyGlass by ParallelAI.
the class HBaseRawScheme method sink.
@SuppressWarnings("unchecked")
@Override
public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
OutputCollector outputCollector = sinkCall.getOutput();
Tuple key = tupleEntry.selectTuple(RowKeyField);
Object okey = key.getObject(0);
ImmutableBytesWritable keyBytes = getBytes(okey);
Put put = new Put(keyBytes.get());
Fields outFields = tupleEntry.getFields().subtract(RowKeyField);
if (null != outFields) {
TupleEntry values = tupleEntry.selectEntry(outFields);
for (int n = 0; n < values.getFields().size(); n++) {
Object o = values.get(n);
ImmutableBytesWritable valueBytes = getBytes(o);
Comparable field = outFields.get(n);
ColumnName cn = parseColumn((String) field);
if (null == cn.family) {
if (n >= familyNames.length)
cn.family = familyNames[familyNames.length - 1];
else
cn.family = familyNames[n];
}
if (null != o || writeNulls)
put.add(Bytes.toBytes(cn.family), Bytes.toBytes(cn.name), valueBytes.get());
}
}
outputCollector.collect(null, put);
}
use of org.apache.hadoop.mapred.OutputCollector in project hbase by apache.
the class TestGroupingTableMap method shouldCreateNewKey.
@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateNewKey() throws Exception {
GroupingTableMap gTableMap = null;
try {
Result result = mock(Result.class);
Reporter reporter = mock(Reporter.class);
final byte[] bSeparator = Bytes.toBytes(" ");
gTableMap = new GroupingTableMap();
Configuration cfg = new Configuration();
cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
JobConf jobConf = new JobConf(cfg);
gTableMap.configure(jobConf);
final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
byte[] row = {};
List<Cell> cells = ImmutableList.<Cell>of(new KeyValue(row, Bytes.toBytes("familyA"), Bytes.toBytes("qualifierA"), firstPartKeyValue), new KeyValue(row, Bytes.toBytes("familyB"), Bytes.toBytes("qualifierB"), secondPartKeyValue));
when(result.listCells()).thenReturn(cells);
final AtomicBoolean outputCollected = new AtomicBoolean();
OutputCollector<ImmutableBytesWritable, Result> outputCollector = new OutputCollector<ImmutableBytesWritable, Result>() {
@Override
public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
assertArrayEquals(org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.concat(firstPartKeyValue, bSeparator, secondPartKeyValue), arg.copyBytes());
outputCollected.set(true);
}
};
gTableMap.map(null, result, outputCollector, reporter);
verify(result).listCells();
Assert.assertTrue("Output not received", outputCollected.get());
final byte[] firstPartValue = Bytes.toBytes("238947928");
final byte[] secondPartValue = Bytes.toBytes("4678456942345");
byte[][] data = { firstPartValue, secondPartValue };
ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
assertArrayEquals(org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.concat(firstPartValue, bSeparator, secondPartValue), byteWritable.get());
} finally {
if (gTableMap != null)
gTableMap.close();
}
}
use of org.apache.hadoop.mapred.OutputCollector in project elephant-bird by twitter.
the class TestCombinedSequenceFile method testHadoopConf.
@Test
public void testHadoopConf() {
CombinedSequenceFile csfScheme = new CombinedSequenceFile(Fields.ALL);
JobConf conf = new JobConf();
FlowProcess fp = new HadoopFlowProcess();
Tap<JobConf, RecordReader, OutputCollector> tap = new TempHfs(conf, "test", CombinedSequenceFile.class, false);
csfScheme.sourceConfInit(fp, tap, conf);
assertEquals("MapReduceInputFormatWrapper shold wrap mapred.SequenceFileinputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat", conf.get(MapReduceInputFormatWrapper.CLASS_CONF_KEY));
assertEquals("Delegate combiner should wrap MapReduceInputFormatWrapper", "com.twitter.elephantbird.mapreduce.input.MapReduceInputFormatWrapper", conf.get(DelegateCombineFileInputFormat.COMBINED_INPUT_FORMAT_DELEGATE));
assertEquals("DeprecatedInputFormatWrapper should wrap Delegate combiner", "com.twitter.elephantbird.mapreduce.input.combine.DelegateCombineFileInputFormat", conf.get(DeprecatedInputFormatWrapper.CLASS_CONF_KEY));
}
Aggregations