Search in sources :

Example 91 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class TezCommonUtils method parseCredentialsBytes.

public static Credentials parseCredentialsBytes(byte[] credentialsBytes) throws IOException {
    Credentials credentials = new Credentials();
    DataInputBuffer dib = new DataInputBuffer();
    try {
        byte[] tokenBytes = credentialsBytes;
        dib.reset(tokenBytes, tokenBytes.length);
        credentials.readTokenStorageStream(dib);
        return credentials;
    } finally {
        dib.close();
    }
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Credentials(org.apache.hadoop.security.Credentials)

Example 92 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class TestTezMerger method verifyData.

private void verifyData(TezRawKeyValueIterator records) throws IOException {
    // Verify the merged data is correct
    Map<Integer, Integer> dataMap = Maps.newHashMap();
    int pk = -1;
    while (records.next()) {
        DataInputBuffer key = records.getKey();
        DataInputBuffer value = records.getValue();
        IntWritable k = new IntWritable();
        k.readFields(key);
        LongWritable v = new LongWritable();
        v.readFields(value);
        if (records.isSameKey()) {
            LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get());
            // More than one key should be present in the source data
            assertTrue(verificationDataSet.get(k.get()).size() > 1);
            // Ensure this is same as the previous key we saw
            assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get());
        } else {
            LOG.info("key=" + k.get() + ", val=" + v.get());
        }
        pk = k.get();
        int keyCount = (dataMap.containsKey(k.get())) ? (dataMap.get(k.get()) + 1) : 1;
        dataMap.put(k.get(), keyCount);
    }
    // Verify if the number of distinct entries is the same in source and the test
    assertTrue("dataMap=" + dataMap.keySet().size() + ", verificationSet=" + verificationDataSet.keySet().size(), dataMap.keySet().size() == verificationDataSet.keySet().size());
    // Verify with source data
    for (Integer key : verificationDataSet.keySet()) {
        assertTrue("Data size for " + key + " not matching with source; dataSize:" + dataMap.get(key).intValue() + ", source:" + verificationDataSet.get(key).size(), dataMap.get(key).intValue() == verificationDataSet.get(key).size());
    }
    // Verify if every key has the same number of repeated items in the source dataset as well
    for (Map.Entry<Integer, Integer> entry : dataMap.entrySet()) {
        assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry.getValue());
    }
    LOG.info("******************");
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) LongWritable(org.apache.hadoop.io.LongWritable) Map(java.util.Map) IntWritable(org.apache.hadoop.io.IntWritable)

Example 93 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class TestTezMerger method verify.

/**
 * Verify if the records are as per the expected data set
 *
 * @param records
 * @param expectedResult
 * @throws IOException
 */
private void verify(TezRawKeyValueIterator records, String[][] expectedResult) throws IOException {
    // Iterate through merged dataset (shouldn't throw any exceptions)
    int i = 0;
    while (records.next()) {
        DataInputBuffer key = records.getKey();
        DataInputBuffer value = records.getValue();
        Text k = new Text();
        k.readFields(key);
        Text v = new Text();
        v.readFields(value);
        assertTrue(k.toString().equals(expectedResult[i][0]));
        String correctResult = expectedResult[i][1];
        if (records.isSameKey()) {
            assertTrue("Expected " + correctResult, correctResult.equalsIgnoreCase(SAME_KEY));
            LOG.info("\tSame Key : key=" + k + ", val=" + v);
        } else {
            assertTrue("Expected " + correctResult, correctResult.equalsIgnoreCase(DIFF_KEY));
            LOG.info("key=" + k + ", val=" + v);
        }
        i++;
    }
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Text(org.apache.hadoop.io.Text)

Example 94 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class TestIFile method testExceedMaxSize.

@Test(timeout = 5000)
public // test overflow
void testExceedMaxSize() throws IOException {
    final int oldMaxBufferSize = IFile.Reader.MAX_BUFFER_SIZE;
    Text shortString = new Text("string");
    Text longString = new Text("A string of length 22.");
    assertEquals(22, longString.getLength());
    Text readKey = new Text();
    Text readValue = new Text();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    IFile.Writer writer;
    IFile.Reader reader;
    FSDataOutputStream out;
    // Check Key length exceeding MAX_BUFFER_SIZE
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(longString, shortString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 16;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    try {
        reader.nextRawKey(keyIn);
        Assert.fail("Expected IllegalArgumentException to be thrown");
    } catch (IllegalArgumentException e) {
    // test passed
    }
    reader.close();
    // Check Value length exceeding MAX_BUFFER_SIZE
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(shortString, longString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 16;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    try {
        reader.nextRawKey(keyIn);
        reader.nextRawValue(valIn);
        Assert.fail("Expected IllegalArgumentException to be thrown");
    } catch (IllegalArgumentException e) {
    // test passed
    }
    reader.close();
    // Check Key length not getting doubled
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(longString, shortString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 32;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    reader.nextRawKey(keyIn);
    assertEquals(longString.getLength() + 1, keyIn.getData().length);
    reader.close();
    // Check Value length not getting doubled
    out = localFs.create(outputPath);
    writer = new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, false);
    writer.append(shortString, longString);
    writer.close();
    out.close();
    // Set this to a smaller value for testing
    IFile.Reader.MAX_BUFFER_SIZE = 32;
    reader = new IFile.Reader(localFs, outputPath, null, null, null, false, 0, -1);
    reader.nextRawKey(keyIn);
    reader.nextRawValue(valIn);
    assertEquals(longString.getLength() + 1, valIn.getData().length);
    reader.close();
    // revert back to original value
    IFile.Reader.MAX_BUFFER_SIZE = oldMaxBufferSize;
}
Also used : Reader(org.apache.tez.runtime.library.common.sort.impl.IFile.Reader) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Text(org.apache.hadoop.io.Text) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.tez.runtime.library.common.sort.impl.IFile.Writer) Test(org.junit.Test)

Example 95 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class TestIFile method verifyData.

/**
 * Data verification
 *
 * @param reader
 * @param data
 * @throws IOException
 */
private void verifyData(Reader reader, List<KVPair> data) throws IOException {
    LOG.info("Data verification");
    Text readKey = new Text();
    IntWritable readValue = new IntWritable();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    Deserializer<Text> keyDeserializer;
    Deserializer<IntWritable> valDeserializer;
    SerializationFactory serializationFactory = new SerializationFactory(defaultConf);
    keyDeserializer = serializationFactory.getDeserializer(Text.class);
    valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
    keyDeserializer.open(keyIn);
    valDeserializer.open(valIn);
    int numRecordsRead = 0;
    while (reader.nextRawKey(keyIn)) {
        reader.nextRawValue(valIn);
        readKey = keyDeserializer.deserialize(readKey);
        readValue = valDeserializer.deserialize(readValue);
        KVPair expected = data.get(numRecordsRead);
        assertEquals("Key does not match: Expected: " + expected.getKey() + ", Read: " + readKey, expected.getKey(), readKey);
        assertEquals("Value does not match: Expected: " + expected.getvalue() + ", Read: " + readValue, expected.getvalue(), readValue);
        numRecordsRead++;
    }
    assertEquals("Expected: " + data.size() + " records, but found: " + numRecordsRead, data.size(), numRecordsRead);
    LOG.info("Found: " + numRecordsRead + " records");
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) KVPair(org.apache.tez.runtime.library.testutils.KVDataGen.KVPair) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)112 Test (org.junit.Test)49 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)45 IOException (java.io.IOException)24 Text (org.apache.hadoop.io.Text)20 Path (org.apache.hadoop.fs.Path)16 Configuration (org.apache.hadoop.conf.Configuration)13 IntWritable (org.apache.hadoop.io.IntWritable)11 Random (java.util.Random)10 DataInputStream (java.io.DataInputStream)9 BufferedInputStream (java.io.BufferedInputStream)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)6 LongWritable (org.apache.hadoop.io.LongWritable)6 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 BufferedOutputStream (java.io.BufferedOutputStream)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Credentials (org.apache.hadoop.security.Credentials)4