use of org.apache.hadoop.io.LongWritable in project cdap by caskdata.
the class StreamInputFormatTest method testStreamRecordReader.
@Test
public void testStreamRecordReader() throws Exception {
File inputDir = tmpFolder.newFolder();
File partition = new File(inputDir, "1.1000");
partition.mkdirs();
File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
// write 1 event
StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
writer.append(StreamFileTestUtils.createEvent(1000, "test"));
writer.flush();
// get splits from the input format. Expect to get 2 splits,
// one from 0 - some offset and one from offset - Long.MAX_VALUE.
Configuration conf = new Configuration();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
AbstractStreamInputFormat format = new AbstractStreamInputFormat() {
@Override
public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
return new NoOpAuthorizer();
}
@Override
public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
return new AuthenticationTestContext();
}
};
List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
Assert.assertEquals(2, splits.size());
// write another event so that the 2nd split has something to read
writer.append(StreamFileTestUtils.createEvent(1001, "test"));
writer.close();
// create a record reader for the 2nd split
StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(new IdentityStreamEventDecoder(), new NoOpAuthorizer(), new AuthenticationTestContext(), DUMMY_ID);
recordReader.initialize(splits.get(1), context);
// check that we read the 2nd stream event
Assert.assertTrue(recordReader.nextKeyValue());
StreamEvent output = recordReader.getCurrentValue();
Assert.assertEquals(1001, output.getTimestamp());
Assert.assertEquals("test", Bytes.toString(output.getBody()));
// check that there is nothing more to read
Assert.assertFalse(recordReader.nextKeyValue());
}
use of org.apache.hadoop.io.LongWritable in project cdap by caskdata.
the class StreamInputFormatTest method testStringStreamEventDecoder.
@Test
public void testStringStreamEventDecoder() {
String body = "Testing";
StreamEvent event = new StreamEvent(ImmutableMap.<String, String>of(), Charsets.UTF_8.encode(body));
StreamEventDecoder<LongWritable, String> decoder = new StringStreamEventDecoder();
StreamEventDecoder.DecodeResult<LongWritable, String> result = new StreamEventDecoder.DecodeResult<>();
result = decoder.decode(event, result);
Assert.assertEquals(event.getTimestamp(), result.getKey().get());
Assert.assertEquals(body, result.getValue());
}
use of org.apache.hadoop.io.LongWritable in project tez by apache.
the class TestTezMerger method createDataForIFile.
/**
* Generate data set for ifile. Create repeated keys if needed.
*
* @param keyCount approximate number of keys to be created
* @param repeatCount number of times a key should be repeated
* @return
*/
static TreeMultimap<Integer, Long> createDataForIFile(int keyCount, int repeatCount) {
TreeMultimap<Integer, Long> dataSet = TreeMultimap.create();
Random rnd = new Random();
for (int i = 0; i < keyCount; i++) {
if (repeatCount > 0 && (rnd.nextInt(keyCount) % 2 == 0)) {
// repeat this key
for (int j = 0; j < repeatCount; j++) {
IntWritable key = new IntWritable(rnd.nextInt(keyCount));
LongWritable value = new LongWritable(System.nanoTime());
dataSet.put(key.get(), value.get());
}
i += repeatCount;
LOG.info("Repeated key count=" + (repeatCount));
} else {
IntWritable key = new IntWritable(rnd.nextInt(keyCount));
LongWritable value = new LongWritable(System.nanoTime());
dataSet.put(key.get(), value.get());
}
}
for (Integer key : dataSet.keySet()) {
for (Long value : dataSet.get(key)) {
LOG.info("Key=" + key + ", val=" + value);
}
}
LOG.info("=============");
return dataSet;
}
use of org.apache.hadoop.io.LongWritable in project tez by apache.
the class TestTezMerger method createInMemorySegments.
private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment) throws IOException {
List<TezMerger.Segment> segmentList = Lists.newLinkedList();
Random rnd = new Random();
DataInputBuffer key = new DataInputBuffer();
DataInputBuffer value = new DataInputBuffer();
for (int i = 0; i < segmentCount; i++) {
BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000);
InMemoryWriter writer = new InMemoryWriter(stream);
for (int j = 0; j < keysPerSegment; j++) {
populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value);
writer.append(key, value);
}
writer.close();
InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit());
segmentList.add(new TezMerger.Segment(reader, null));
}
return segmentList;
}
use of org.apache.hadoop.io.LongWritable in project tez by apache.
the class TestUnorderedPartitionedKVWriter method baseTest.
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent, int availableMemory) throws IOException, InterruptedException {
PartitionerForTest partitioner = new PartitionerForTest();
ApplicationId appId = ApplicationId.newInstance(10000000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
int dagId = 1;
String auxiliaryService = defaultConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService);
Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, maxSingleBufferSizeBytes);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, bufferMergePercent);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numOutputs = numPartitions;
int numRecordsWritten = 0;
Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
for (int i = 0; i < numOutputs; i++) {
expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
}
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
// IntW + LongW
int sizePerRecord = 4 + 8;
// Record + META_OVERHEAD
int sizePerRecordWithOverhead = sizePerRecord + 12;
IntWritable intWritable = new IntWritable();
LongWritable longWritable = new LongWritable();
BitSet partitionsWithData = new BitSet(numPartitions);
for (int i = 0; i < numRecords; i++) {
intWritable.set(i);
longWritable.set(i);
int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
if (skippedPartitions != null && skippedPartitions.contains(partition)) {
continue;
}
partitionsWithData.set(partition);
expectedValues.get(partition).put(intWritable.get(), longWritable.get());
kvWriter.write(intWritable, longWritable);
numRecordsWritten++;
}
List<Event> events = kvWriter.close();
if (numPartitions == 1) {
assertEquals(true, kvWriter.skipBuffers);
}
int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
int numExpectedSpills = numRecordsWritten / recordsPerBuffer / kvWriter.spillLimit;
verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class));
assertNull(kvWriter.currentBuffer);
assertEquals(0, kvWriter.availableBuffers.size());
// Verify the counters
TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
TezCounter additionalSpillBytesWritternCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
if (numPartitions > 1) {
assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
}
assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
long fileOutputBytes = fileOutputBytesCounter.getValue();
if (numRecordsWritten > 0) {
assertTrue(fileOutputBytes > 0);
if (!shouldCompress) {
assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
}
} else {
assertEquals(0, fileOutputBytes);
}
assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
if (numExpectedSpills == 0) {
assertEquals(0, additionalSpillBytesWritten);
assertEquals(0, additionalSpillBytesRead);
} else {
assertTrue(additionalSpillBytesWritten > 0);
assertTrue(additionalSpillBytesRead > 0);
if (!shouldCompress) {
assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
}
}
assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead);
// due to multiple threads, buffers could be merged in chunks in scheduleSpill.
assertTrue(numExpectedSpills >= numAdditionalSpillsCounter.getValue());
BitSet emptyPartitionBits = null;
// Verify the events returned
assertEquals(2, events.size());
assertTrue(events.get(0) instanceof VertexManagerEvent);
VertexManagerEvent vme = (VertexManagerEvent) events.get(0);
verifyPartitionStats(vme, partitionsWithData);
assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numOutputs, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
if (skippedPartitions == null && numRecordsWritten > 0) {
assertFalse(eventProto.hasEmptyPartitions());
emptyPartitionBits = new BitSet(numPartitions);
} else {
assertTrue(eventProto.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto.getEmptyPartitions());
emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
if (numRecordsWritten == 0) {
assertEquals(numPartitions, emptyPartitionBits.cardinality());
} else {
for (Integer e : skippedPartitions) {
assertTrue(emptyPartitionBits.get(e));
}
assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
}
}
if (emptyPartitionBits.cardinality() != numPartitions) {
assertEquals(HOST_STRING, eventProto.getHost());
assertEquals(SHUFFLE_PORT, eventProto.getPort());
assertEquals(uniqueId, eventProto.getPathComponent());
} else {
assertFalse(eventProto.hasHost());
assertFalse(eventProto.hasPort());
assertFalse(eventProto.hasPathComponent());
}
// Verify the actual data
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId, dagId);
Path outputFilePath = kvWriter.finalOutPath;
Path spillFilePath = kvWriter.finalIndexPath;
if (numRecordsWritten <= 0) {
return;
}
assertTrue(localFs.exists(outputFilePath));
assertTrue(localFs.exists(spillFilePath));
// verify no intermediate spill files have been left around
synchronized (kvWriter.spillInfoList) {
for (SpillInfo spill : kvWriter.spillInfoList) {
assertFalse("lingering intermediate spill file " + spill.outPath, localFs.exists(spill.outPath));
}
}
// Special case for 0 records.
TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
IntWritable keyDeser = new IntWritable();
LongWritable valDeser = new LongWritable();
for (int i = 0; i < numOutputs; i++) {
TezIndexRecord indexRecord = spillRecord.getIndex(i);
if (skippedPartitions != null && skippedPartitions.contains(i)) {
assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData());
continue;
}
FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
inStream.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1);
while (reader.nextRawKey(keyBuffer)) {
reader.nextRawValue(valBuffer);
keyDeser.readFields(keyBuffer);
valDeser.readFields(valBuffer);
int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
}
inStream.close();
}
for (int i = 0; i < numOutputs; i++) {
assertEquals(0, expectedValues.get(i).size());
expectedValues.remove(i);
}
assertEquals(0, expectedValues.size());
verify(outputContext, atLeast(1)).notifyProgress();
}
Aggregations