use of org.apache.hadoop.io.serializer.Serializer in project crunch by cloudera.
the class CrunchInputSplit method write.
public void write(DataOutput out) throws IOException {
out.writeInt(nodeIndex);
out.writeInt(extraConf.size());
for (Map.Entry<String, String> e : extraConf.entrySet()) {
out.writeUTF(e.getKey());
out.writeUTF(e.getValue());
}
Text.writeString(out, inputFormatClass.getName());
Text.writeString(out, inputSplit.getClass().getName());
SerializationFactory factory = new SerializationFactory(conf);
Serializer serializer = factory.getSerializer(inputSplit.getClass());
serializer.open((DataOutputStream) out);
serializer.serialize(inputSplit);
}
use of org.apache.hadoop.io.serializer.Serializer in project hadoop by apache.
the class TaggedInputSplit method write.
@SuppressWarnings("unchecked")
public void write(DataOutput out) throws IOException {
Text.writeString(out, inputSplitClass.getName());
Text.writeString(out, inputFormatClass.getName());
Text.writeString(out, mapperClass.getName());
SerializationFactory factory = new SerializationFactory(conf);
Serializer serializer = factory.getSerializer(inputSplitClass);
serializer.open((DataOutputStream) out);
serializer.serialize(inputSplit);
}
use of org.apache.hadoop.io.serializer.Serializer in project tez by apache.
the class TestValuesIterator method createInMemStreams.
/**
* create inmemory segments
*
* @return
* @throws IOException
*/
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
int numberOfStreams = Math.max(2, rnd.nextInt(10));
LOG.info("No of streams : " + numberOfStreams);
SerializationFactory serializationFactory = new SerializationFactory(conf);
Serializer keySerializer = serializationFactory.getSerializer(keyClass);
Serializer valueSerializer = serializationFactory.getSerializer(valClass);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext context = createTezInputContext();
MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);
DataOutputBuffer keyBuf = new DataOutputBuffer();
DataOutputBuffer valBuf = new DataOutputBuffer();
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();
keySerializer.open(keyBuf);
valueSerializer.open(valBuf);
List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
for (int i = 0; i < numberOfStreams; i++) {
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
InMemoryWriter writer = new InMemoryWriter(bout);
Map<Writable, Writable> data = createData();
// write data
for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
keySerializer.serialize(entry.getKey());
valueSerializer.serialize(entry.getValue());
keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
valIn.reset(valBuf.getData(), 0, valBuf.getLength());
writer.append(keyIn, valIn);
originalData.put(entry.getKey(), entry.getValue());
keyBuf.reset();
valBuf.reset();
keyIn.reset();
valIn.reset();
}
IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
segments.add(new TezMerger.Segment(reader, null));
data.clear();
writer.close();
}
return segments;
}
use of org.apache.hadoop.io.serializer.Serializer in project cdap by caskdata.
the class TaggedInputSplit method write.
@SuppressWarnings("unchecked")
@Override
public final void write(DataOutput out) throws IOException {
Class<? extends InputSplit> inputSplitClass = inputSplit.getClass();
Text.writeString(out, inputSplitClass.getName());
writeAdditionalFields(out);
SerializationFactory factory = new SerializationFactory(conf);
Serializer serializer = factory.getSerializer(inputSplitClass);
serializer.open((DataOutputStream) out);
serializer.serialize(inputSplit);
}
use of org.apache.hadoop.io.serializer.Serializer in project apex-malhar by apache.
the class MapOperatorTest method testNodeProcessingSchema.
public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException {
CollectorTestSink sortSink = new CollectorTestSink();
oper.output.setSink(sortSink);
oper.setMapClass(WordCount.Map.class);
oper.setCombineClass(WordCount.Reduce.class);
oper.setDirName(testMeta.testDir);
oper.setConfigFile(null);
oper.setInputFormatClass(TextInputFormat.class);
Configuration conf = new Configuration();
JobConf jobConf = new JobConf(conf);
FileInputFormat.setInputPaths(jobConf, new Path(testMeta.testDir));
TextInputFormat inputFormat = new TextInputFormat();
inputFormat.configure(jobConf);
InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
SerializationFactory serializationFactory = new SerializationFactory(conf);
Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
keySerializer.open(oper.getOutstream());
keySerializer.serialize(splits[0]);
oper.setInputSplitClass(splits[0].getClass());
keySerializer.close();
oper.setup(null);
oper.beginWindow(0);
oper.emitTuples();
oper.emitTuples();
oper.endWindow();
oper.beginWindow(1);
oper.emitTuples();
oper.endWindow();
Assert.assertEquals("number emitted tuples", 3, sortSink.collectedTuples.size());
for (Object o : sortSink.collectedTuples) {
LOG.debug(o.toString());
}
LOG.debug("Done testing round\n");
oper.teardown();
}
Aggregations