Search in sources :

Example 26 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class DistributedFSCheck method listSubtree.

private void listSubtree(FileStatus rootStatus, SequenceFile.Writer writer) throws IOException {
    Path rootFile = rootStatus.getPath();
    if (rootStatus.isFile()) {
        nrFiles++;
        // For a regular file generate <fName,offset> pairs
        long blockSize = fs.getDefaultBlockSize(rootFile);
        long fileLength = rootStatus.getLen();
        for (long offset = 0; offset < fileLength; offset += blockSize) writer.append(new Text(rootFile.toString()), new LongWritable(offset));
        return;
    }
    FileStatus[] children = null;
    try {
        children = fs.listStatus(rootFile);
    } catch (FileNotFoundException fnfe) {
        throw new IOException("Could not get listing for " + rootFile);
    }
    for (int i = 0; i < children.length; i++) listSubtree(children[i], writer);
}
Also used : FileNotFoundException(java.io.FileNotFoundException) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException)

Example 27 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestDFSIO method createControlFile.

@SuppressWarnings("deprecation")
private void createControlFile(FileSystem fs, // in bytes
long nrBytes, int nrFiles) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");
    Path controlDir = getControlDir(config);
    fs.delete(controlDir, true);
    for (int i = 0; i < nrFiles; i++) {
        String name = getFileName(i);
        Path controlFile = new Path(controlDir, "in_file_" + name);
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(fs, config, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
            writer.append(new Text(name), new LongWritable(nrBytes));
        } catch (Exception e) {
            throw new IOException(e.getLocalizedMessage());
        } finally {
            if (writer != null)
                writer.close();
            writer = null;
        }
    }
    LOG.info("created control files for: " + nrFiles + " files");
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException) IOException(java.io.IOException)

Example 28 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestSequenceFileAsTextInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        //LOG.info("creating; entries = " + length);
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                LongWritable value = new LongWritable(10 * i);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter);
                Class readerClass = reader.getClass();
                assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
                Text value = reader.createValue();
                Text key = reader.createKey();
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        int keyInt = Integer.parseInt(key.toString());
                        assertFalse("Key in multiple partitions.", bits.get(keyInt));
                        bits.set(keyInt);
                        count++;
                    }
                //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 29 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestTupleWritable method makeRandomWritables.

private Writable[] makeRandomWritables() {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    return writs;
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 30 with LongWritable

use of org.apache.hadoop.io.LongWritable in project hadoop by apache.

the class TestTupleWritable method testNestedIterable.

@Test
public void testNestedIterable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

LongWritable (org.apache.hadoop.io.LongWritable)445 Text (org.apache.hadoop.io.Text)220 Test (org.junit.Test)171 IntWritable (org.apache.hadoop.io.IntWritable)102 Path (org.apache.hadoop.fs.Path)99 BytesWritable (org.apache.hadoop.io.BytesWritable)70 FloatWritable (org.apache.hadoop.io.FloatWritable)68 Configuration (org.apache.hadoop.conf.Configuration)62 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)62 BooleanWritable (org.apache.hadoop.io.BooleanWritable)60 ArrayList (java.util.ArrayList)59 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)57 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)53 IOException (java.io.IOException)49 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)48 SequenceFile (org.apache.hadoop.io.SequenceFile)42 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)40 FileSystem (org.apache.hadoop.fs.FileSystem)37 JobConf (org.apache.hadoop.mapred.JobConf)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)35