Search in sources :

Example 46 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class UnorderedPartitionedKVWriter method mergeAll.

private void mergeAll() throws IOException {
    long expectedSize = spilledSize;
    if (currentBuffer.nextPosition != 0) {
        expectedSize += currentBuffer.nextPosition - (currentBuffer.numRecords * META_SIZE) - currentBuffer.skipSize + numPartitions * APPROX_HEADER_LENGTH;
        // Update final statistics.
        updateGlobalStats(currentBuffer);
    }
    SpillPathDetails spillPathDetails = getSpillPathDetails(true, expectedSize);
    finalIndexPath = spillPathDetails.indexFilePath;
    finalOutPath = spillPathDetails.outputFilePath;
    TezSpillRecord finalSpillRecord = new TezSpillRecord(numPartitions);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    DataInputBuffer keyBufferIFile = new DataInputBuffer();
    DataInputBuffer valBufferIFile = new DataInputBuffer();
    FSDataOutputStream out = null;
    try {
        out = rfs.create(finalOutPath);
        if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(conf)))) {
            rfs.setPermission(finalOutPath, SPILL_FILE_PERMS);
        }
        Writer writer = null;
        for (int i = 0; i < numPartitions; i++) {
            long segmentStart = out.getPos();
            if (numRecordsPerPartition[i] == 0) {
                LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records");
                continue;
            }
            writer = new Writer(conf, out, keyClass, valClass, codec, null, null);
            try {
                if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) {
                    // Write current buffer.
                    writePartition(currentBuffer.partitionPositions[i], currentBuffer, writer, keyBuffer, valBuffer);
                }
                synchronized (spillInfoList) {
                    for (SpillInfo spillInfo : spillInfoList) {
                        TezIndexRecord indexRecord = spillInfo.spillRecord.getIndex(i);
                        if (indexRecord.getPartLength() == 0) {
                            // Skip empty partitions within a spill
                            continue;
                        }
                        FSDataInputStream in = rfs.open(spillInfo.outPath);
                        in.seek(indexRecord.getStartOffset());
                        IFile.Reader reader = new IFile.Reader(in, indexRecord.getPartLength(), codec, null, additionalSpillBytesReadCounter, ifileReadAhead, ifileReadAheadLength, ifileBufferSize);
                        while (reader.nextRawKey(keyBufferIFile)) {
                            // TODO Inefficient. If spills are not compressed, a direct copy should be possible
                            // given the current IFile format. Also exteremely inefficient for large records,
                            // since the entire record will be read into memory.
                            reader.nextRawValue(valBufferIFile);
                            writer.append(keyBufferIFile, valBufferIFile);
                        }
                        reader.close();
                    }
                }
                writer.close();
                fileOutputBytesCounter.increment(writer.getCompressedLength());
                TezIndexRecord indexRecord = new TezIndexRecord(segmentStart, writer.getRawLength(), writer.getCompressedLength());
                writer = null;
                finalSpillRecord.putIndex(indexRecord, i);
                outputContext.notifyProgress();
            } finally {
                if (writer != null) {
                    writer.close();
                }
            }
        }
    } finally {
        if (out != null) {
            out.close();
        }
        deleteIntermediateSpills();
    }
    finalSpillRecord.writeToFile(finalIndexPath, conf);
    fileOutputBytesCounter.increment(indexFileSizeEstimate);
    LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills");
}
Also used : IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) TezIndexRecord(org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)

Example 47 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class ValuesIterator method readNextValue.

/**
 * Read the next value
 * @throws IOException
 */
private void readNextValue() throws IOException {
    DataInputBuffer nextValueBytes = in.getValue();
    valueIn.reset(nextValueBytes.getData(), nextValueBytes.getPosition(), nextValueBytes.getLength() - nextValueBytes.getPosition());
    value = valDeserializer.deserialize(value);
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer)

Example 48 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project gora by apache.

the class TestIOUtils method testNullFieldsWith.

private void testNullFieldsWith(Object... values) throws IOException {
    DataOutputBuffer out = new DataOutputBuffer();
    DataInputBuffer in = new DataInputBuffer();
    IOUtils.writeNullFieldsInfo(out, values);
    in.reset(out.getData(), out.getLength());
    boolean[] ret = IOUtils.readNullFieldsInfo(in);
    // assert
    assertEquals(values.length, ret.length);
    for (int i = 0; i < values.length; i++) {
        assertEquals(values[i] == null, ret[i]);
    }
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer)

Example 49 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hbase by apache.

the class TestHBaseSaslRpcClient method testFailedEvaluateResponse.

@Test(expected = IOException.class)
public void testFailedEvaluateResponse() throws IOException {
    // prep mockin the SaslClient
    SimpleSaslClientAuthenticationProvider mockProvider = Mockito.mock(SimpleSaslClientAuthenticationProvider.class);
    SaslClient mockClient = Mockito.mock(SaslClient.class);
    Assert.assertNotNull(mockProvider);
    Assert.assertNotNull(mockClient);
    Mockito.when(mockProvider.createClient(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.anyBoolean(), Mockito.any())).thenReturn(mockClient);
    HBaseSaslRpcClient rpcClient = new HBaseSaslRpcClient(HBaseConfiguration.create(), mockProvider, createTokenMock(), Mockito.mock(InetAddress.class), Mockito.mock(SecurityInfo.class), false);
    // simulate getting an error from a failed saslServer.evaluateResponse
    DataOutputBuffer errorBuffer = new DataOutputBuffer();
    errorBuffer.writeInt(SaslStatus.ERROR.state);
    WritableUtils.writeString(errorBuffer, IOException.class.getName());
    WritableUtils.writeString(errorBuffer, "Invalid Token");
    DataInputBuffer in = new DataInputBuffer();
    in.reset(errorBuffer.getData(), 0, errorBuffer.getLength());
    DataOutputBuffer out = new DataOutputBuffer();
    // simulate that authentication exchange has completed quickly after sending the token
    Mockito.when(mockClient.isComplete()).thenReturn(true);
    rpcClient.saslConnect(in, out);
}
Also used : SimpleSaslClientAuthenticationProvider(org.apache.hadoop.hbase.security.provider.SimpleSaslClientAuthenticationProvider) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) IOException(java.io.IOException) InetAddress(java.net.InetAddress) SaslClient(javax.security.sasl.SaslClient) Test(org.junit.Test)

Example 50 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hbase by apache.

the class RegionInfo method parseDelimitedFrom.

/**
 * Parses all the RegionInfo instances from the passed in stream until EOF. Presumes the
 * RegionInfo's were serialized to the stream with oDelimitedByteArray()
 * @param bytes serialized bytes
 * @param offset the start offset into the byte[] buffer
 * @param length how far we should read into the byte[] buffer
 * @return All the RegionInfos that are in the byte array. Keeps reading till we hit the end.
 */
static List<RegionInfo> parseDelimitedFrom(final byte[] bytes, final int offset, final int length) throws IOException {
    if (bytes == null) {
        throw new IllegalArgumentException("Can't build an object with empty bytes array");
    }
    List<RegionInfo> ris = new ArrayList<>();
    try (DataInputBuffer in = new DataInputBuffer()) {
        in.reset(bytes, offset, length);
        while (in.available() > 0) {
            RegionInfo ri = parseFrom(in);
            ris.add(ri);
        }
    }
    return ris;
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) ArrayList(java.util.ArrayList)

Aggregations

DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)112 Test (org.junit.Test)49 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)45 IOException (java.io.IOException)24 Text (org.apache.hadoop.io.Text)20 Path (org.apache.hadoop.fs.Path)16 Configuration (org.apache.hadoop.conf.Configuration)13 IntWritable (org.apache.hadoop.io.IntWritable)11 Random (java.util.Random)10 DataInputStream (java.io.DataInputStream)9 BufferedInputStream (java.io.BufferedInputStream)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)6 LongWritable (org.apache.hadoop.io.LongWritable)6 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 BufferedOutputStream (java.io.BufferedOutputStream)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Credentials (org.apache.hadoop.security.Credentials)4