use of org.apache.hadoop.io.BoundedByteArrayOutputStream in project tez by apache.
the class TestValuesIterator method createInMemStreams.
/**
* create inmemory segments
*
* @return
* @throws IOException
*/
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
int numberOfStreams = Math.max(2, rnd.nextInt(10));
LOG.info("No of streams : " + numberOfStreams);
SerializationFactory serializationFactory = new SerializationFactory(conf);
Serializer keySerializer = serializationFactory.getSerializer(keyClass);
Serializer valueSerializer = serializationFactory.getSerializer(valClass);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext context = createTezInputContext();
MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);
DataOutputBuffer keyBuf = new DataOutputBuffer();
DataOutputBuffer valBuf = new DataOutputBuffer();
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();
keySerializer.open(keyBuf);
valueSerializer.open(valBuf);
List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
for (int i = 0; i < numberOfStreams; i++) {
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
InMemoryWriter writer = new InMemoryWriter(bout);
Map<Writable, Writable> data = createData();
// write data
for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
keySerializer.serialize(entry.getKey());
valueSerializer.serialize(entry.getValue());
keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
valIn.reset(valBuf.getData(), 0, valBuf.getLength());
writer.append(keyIn, valIn);
originalData.put(entry.getKey(), entry.getValue());
keyBuf.reset();
valBuf.reset();
keyIn.reset();
valIn.reset();
}
IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
segments.add(new TezMerger.Segment(reader, null));
data.clear();
writer.close();
}
return segments;
}
use of org.apache.hadoop.io.BoundedByteArrayOutputStream in project tez by apache.
the class TestTezMerger method createInMemorySegments.
private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment) throws IOException {
List<TezMerger.Segment> segmentList = Lists.newLinkedList();
Random rnd = new Random();
DataInputBuffer key = new DataInputBuffer();
DataInputBuffer value = new DataInputBuffer();
for (int i = 0; i < segmentCount; i++) {
BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000);
InMemoryWriter writer = new InMemoryWriter(stream);
for (int j = 0; j < keysPerSegment; j++) {
populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value);
writer.append(key, value);
}
writer.close();
InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit());
segmentList.add(new TezMerger.Segment(reader, null));
}
return segmentList;
}
use of org.apache.hadoop.io.BoundedByteArrayOutputStream in project tez by apache.
the class TestIFile method testWithRLEMarker.
@Test(timeout = 5000)
public // test with sorted data and repeat keys
void testWithRLEMarker() throws IOException {
// Test with append(Object, Object)
FSDataOutputStream out = localFs.create(outputPath);
IFile.Writer writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true);
Text key = new Text("key0");
IntWritable value = new IntWritable(0);
writer.append(key, value);
// same key (RLE should kick in)
key = new Text("key0");
writer.append(key, value);
assertTrue(writer.sameKey);
// Different key
key = new Text("key1");
writer.append(key, value);
assertFalse(writer.sameKey);
writer.close();
out.close();
// Test with append(DataInputBuffer key, DataInputBuffer value)
byte[] kvbuffer = "key1Value1key1Value2key3Value3".getBytes();
int keyLength = 4;
int valueLength = 6;
int pos = 0;
out = localFs.create(outputPath);
writer = new IFile.Writer(defaultConf, out, Text.class, IntWritable.class, codec, null, null, true);
BoundedByteArrayOutputStream boundedOut = new BoundedByteArrayOutputStream(1024 * 1024);
Writer inMemWriter = new InMemoryWriter(boundedOut, true);
DataInputBuffer kin = new DataInputBuffer();
kin.reset(kvbuffer, pos, keyLength);
DataInputBuffer vin = new DataInputBuffer();
DataOutputBuffer vout = new DataOutputBuffer();
(new IntWritable(0)).write(vout);
vin.reset(vout.getData(), vout.getLength());
// Write initial KV pair
writer.append(kin, vin);
assertFalse(writer.sameKey);
inMemWriter.append(kin, vin);
assertFalse(inMemWriter.sameKey);
pos += (keyLength + valueLength);
// Second key is similar to key1 (RLE should kick in)
kin.reset(kvbuffer, pos, keyLength);
(new IntWritable(0)).write(vout);
vin.reset(vout.getData(), vout.getLength());
writer.append(kin, vin);
assertTrue(writer.sameKey);
inMemWriter.append(kin, vin);
assertTrue(inMemWriter.sameKey);
pos += (keyLength + valueLength);
// Next key (key3) is different (RLE should not kick in)
kin.reset(kvbuffer, pos, keyLength);
(new IntWritable(0)).write(vout);
vin.reset(vout.getData(), vout.getLength());
writer.append(kin, vin);
assertFalse(writer.sameKey);
inMemWriter.append(kin, vin);
assertFalse(inMemWriter.sameKey);
writer.close();
out.close();
inMemWriter.close();
boundedOut.close();
}
use of org.apache.hadoop.io.BoundedByteArrayOutputStream in project hadoop by apache.
the class TestMergeManager method fillOutput.
private void fillOutput(InMemoryMapOutput<Text, Text> output) throws IOException {
BoundedByteArrayOutputStream stream = output.getArrayStream();
int count = stream.getLimit();
for (int i = 0; i < count; ++i) {
stream.write(i);
}
}
use of org.apache.hadoop.io.BoundedByteArrayOutputStream in project tez by apache.
the class TestIFile method testInMemoryWriter.
@Test(timeout = 5000)
public // Test InMemoryWriter
void testInMemoryWriter() throws IOException {
InMemoryWriter writer = null;
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
List<KVPair> data = KVDataGen.generateTestData(true, 10);
// No RLE, No RepeatKeys, no compression
writer = new InMemoryWriter(bout);
writeTestFileUsingDataBuffer(writer, false, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// No RLE, RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout);
writeTestFileUsingDataBuffer(writer, true, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// RLE, No RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout, true);
writeTestFileUsingDataBuffer(writer, false, data);
readUsingInMemoryReader(bout.getBuffer(), data);
// RLE, RepeatKeys, no compression
bout.reset();
writer = new InMemoryWriter(bout, true);
writeTestFileUsingDataBuffer(writer, true, data);
readUsingInMemoryReader(bout.getBuffer(), data);
}
Aggregations