Search in sources :

Example 1 with DumpTypedBytes

use of org.apache.hadoop.streaming.DumpTypedBytes in project hadoop by apache.

the class TestDumpTypedBytes method testDumping.

@Test
public void testDumping() throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    FileSystem fs = cluster.getFileSystem();
    PrintStream psBackup = System.out;
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    PrintStream psOut = new PrintStream(out);
    System.setOut(psOut);
    DumpTypedBytes dumptb = new DumpTypedBytes(conf);
    try {
        Path root = new Path("/typedbytestest");
        assertTrue(fs.mkdirs(root));
        assertTrue(fs.exists(root));
        OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(root, "test.txt")));
        try {
            for (int i = 0; i < 100; i++) {
                writer.write("" + (10 * i) + "\n");
            }
        } finally {
            writer.close();
        }
        String[] args = new String[1];
        args[0] = "/typedbytestest";
        int ret = dumptb.run(args);
        assertEquals("Return value != 0.", 0, ret);
        ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
        TypedBytesInput tbinput = new TypedBytesInput(new DataInputStream(in));
        int counter = 0;
        Object key = tbinput.read();
        while (key != null) {
            // offset
            assertEquals(Long.class, key.getClass());
            Object value = tbinput.read();
            assertEquals(String.class, value.getClass());
            assertTrue("Invalid output.", Integer.parseInt(value.toString()) % 10 == 0);
            counter++;
            key = tbinput.read();
        }
        assertEquals("Wrong number of outputs.", 100, counter);
    } finally {
        try {
            fs.close();
        } catch (Exception e) {
        }
        System.setOut(psBackup);
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PrintStream(java.io.PrintStream) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) TypedBytesInput(org.apache.hadoop.typedbytes.TypedBytesInput) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) DumpTypedBytes(org.apache.hadoop.streaming.DumpTypedBytes) ByteArrayInputStream(java.io.ByteArrayInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Aggregations

ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataInputStream (java.io.DataInputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintStream (java.io.PrintStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)1 DumpTypedBytes (org.apache.hadoop.streaming.DumpTypedBytes)1 TypedBytesInput (org.apache.hadoop.typedbytes.TypedBytesInput)1 Test (org.junit.Test)1