Search in sources :

Example 31 with Writer

use of java.io.Writer in project hadoop by apache.

the class TestLineRecordReader method createInputFile.

/**
   * Writes the input test file
   *
   * @param conf
   * @return Path of the file created
   * @throws IOException
   */
private Path createInputFile(Configuration conf, String data) throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path file = new Path(inputDir, "test.txt");
    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {
        writer.write(data);
    } finally {
        writer.close();
    }
    return file;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 32 with Writer

use of java.io.Writer in project hadoop by apache.

the class TestNoDefaultsJobConf method testNoDefaults.

@Test
public void testNoDefaults() throws Exception {
    JobConf configuration = new JobConf();
    assertTrue(configuration.get("hadoop.tmp.dir", null) != null);
    configuration = new JobConf(false);
    assertTrue(configuration.get("hadoop.tmp.dir", null) == null);
    Path inDir = new Path("testing/jobconf/input");
    Path outDir = new Path("testing/jobconf/output");
    OutputStream os = getFileSystem().create(new Path(inDir, "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello\n");
    wr.write("hello\n");
    wr.close();
    JobConf conf = new JobConf(false);
    conf.set("fs.defaultFS", createJobConf().get("fs.defaultFS"));
    conf.setJobName("mr");
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient.runJob(conf);
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter()));
    if (outputFiles.length > 0) {
        InputStream is = getFileSystem().open(outputFiles[0]);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        String line = reader.readLine();
        int counter = 0;
        while (line != null) {
            counter++;
            assertTrue(line.contains("hello"));
            line = reader.readLine();
        }
        reader.close();
        assertEquals(2, counter);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Utils(org.apache.hadoop.mapred.Utils) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) JobConf(org.apache.hadoop.mapred.JobConf) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Example 33 with Writer

use of java.io.Writer in project hadoop by apache.

the class TestUserDefinedCounters method cleanAndCreateInput.

private void cleanAndCreateInput(FileSystem fs) throws IOException {
    fs.delete(INPUT_DIR, true);
    fs.delete(OUTPUT_DIR, true);
    OutputStream os = fs.create(INPUT_FILE);
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.write("hello4\n");
    wr.close();
}
Also used : OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 34 with Writer

use of java.io.Writer in project hadoop by apache.

the class TestTextInputFormat method testFormat.

@Test(timeout = 500000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "test.txt");
    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        LOG.debug("creating; entries = " + length);
        // create a file with length entries
        Writer writer = new OutputStreamWriter(localFs.create(file));
        try {
            for (int i = 0; i < length; i++) {
                writer.write(Integer.toString(i));
                writer.write("\n");
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        TextInputFormat format = new TextInputFormat();
        format.configure(job);
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / 20) + 1;
            LOG.debug("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            LOG.debug("splitting: got =        " + splits.length);
            if (length == 0) {
                assertEquals("Files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length);
                assertEquals("Empty file length == 0", 0, splits[0].getLength());
            }
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                LOG.debug("split[" + j + "]= " + splits[j]);
                RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        int v = Integer.parseInt(value.toString());
                        LOG.debug("read " + v);
                        if (bits.get(v)) {
                            LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());
                        }
                        assertFalse("Key in multiple partitions.", bits.get(v));
                        bits.set(v);
                        count++;
                    }
                    LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Random(java.util.Random) OutputStreamWriter(java.io.OutputStreamWriter) LongWritable(org.apache.hadoop.io.LongWritable) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Example 35 with Writer

use of java.io.Writer in project hadoop by apache.

the class TestAutoInputFormat method testFormat.

@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testFormat() throws IOException {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path txtFile = new Path(dir, "auto.txt");
    Path seqFile = new Path(dir, "auto.seq");
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    Writer txtWriter = new OutputStreamWriter(fs.create(txtFile));
    try {
        for (int i = 0; i < LINES_COUNT; i++) {
            txtWriter.write("" + (10 * i));
            txtWriter.write("\n");
        }
    } finally {
        txtWriter.close();
    }
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, seqFile, IntWritable.class, LongWritable.class);
    try {
        for (int i = 0; i < RECORDS_COUNT; i++) {
            IntWritable key = new IntWritable(11 * i);
            LongWritable value = new LongWritable(12 * i);
            seqWriter.append(key, value);
        }
    } finally {
        seqWriter.close();
    }
    AutoInputFormat format = new AutoInputFormat();
    InputSplit[] splits = format.getSplits(job, SPLITS_COUNT);
    for (InputSplit split : splits) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        try {
            while (reader.next(key, value)) {
                if (key instanceof LongWritable) {
                    assertEquals("Wrong value class.", Text.class, value.getClass());
                    assertTrue("Invalid value", Integer.parseInt(((Text) value).toString()) % 10 == 0);
                } else {
                    assertEquals("Wrong key class.", IntWritable.class, key.getClass());
                    assertEquals("Wrong value class.", LongWritable.class, value.getClass());
                    assertTrue("Invalid key.", ((IntWritable) key).get() % 11 == 0);
                    assertTrue("Invalid value.", ((LongWritable) value).get() % 12 == 0);
                }
            }
        } finally {
            reader.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AutoInputFormat(org.apache.hadoop.streaming.AutoInputFormat) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

Writer (java.io.Writer)1259 OutputStreamWriter (java.io.OutputStreamWriter)512 IOException (java.io.IOException)414 StringWriter (java.io.StringWriter)300 File (java.io.File)269 FileOutputStream (java.io.FileOutputStream)196 BufferedWriter (java.io.BufferedWriter)178 FileWriter (java.io.FileWriter)174 PrintWriter (java.io.PrintWriter)159 OutputStream (java.io.OutputStream)120 Test (org.junit.Test)109 InputStreamReader (java.io.InputStreamReader)71 ByteArrayOutputStream (java.io.ByteArrayOutputStream)64 BufferedReader (java.io.BufferedReader)62 Reader (java.io.Reader)62 HashMap (java.util.HashMap)59 Map (java.util.Map)59 ArrayList (java.util.ArrayList)58 InputStream (java.io.InputStream)54 Properties (java.util.Properties)39