Search in sources :

Example 91 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hive by apache.

the class TestGenericUDFSubstringIndex method runAndVerify.

private void runAndVerify(String str, String delim, Integer count, String expResult, GenericUDF udf) throws HiveException {
    DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null);
    DeferredObject valueObj1 = new DeferredJavaObject(delim != null ? new Text(delim) : delim);
    DeferredObject valueObj2 = new DeferredJavaObject(count != null ? new IntWritable(count) : null);
    DeferredObject[] args = { valueObj0, valueObj1, valueObj2 };
    Text output = (Text) udf.evaluate(args);
    assertEquals("substring_index() test ", expResult, output != null ? output.toString() : null);
}
Also used : DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable)

Example 92 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class ShuffleSchedulerImpl method hostFailed.

public synchronized void hostFailed(String hostname) {
    if (hostFailures.containsKey(hostname)) {
        IntWritable x = hostFailures.get(hostname);
        x.set(x.get() + 1);
    } else {
        hostFailures.put(hostname, new IntWritable(1));
    }
}
Also used : IntWritable(org.apache.hadoop.io.IntWritable)

Example 93 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException {
    JobConf job = new JobConf();
    FileSystem fs = FileSystem.getLocal(job);
    Path dir = new Path(new Path(new Path(System.getProperty("test.build.data", ".")), FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
    Path file = new Path(dir, "testbinary.seq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    fs.delete(dir, true);
    if (!fs.mkdirs(dir)) {
        fail("Failed to create output directory");
    }
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    FileOutputFormat.setOutputPath(job, dir.getParent().getParent());
    FileOutputFormat.setWorkOutputPath(job, dir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    RecordWriter<BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat().getRecordWriter(fs, job, file.toString(), Reporter.NULL);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(Reporter.NULL);
    }
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    DataInputBuffer buf = new DataInputBuffer();
    final int NUM_SPLITS = 3;
    SequenceFileInputFormat.addInputPath(job, file);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job, NUM_SPLITS)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.getRecordReader(split, job, Reporter.NULL);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.next(iwritable, dwritable)) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 94 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestSequenceFileAsTextInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        //LOG.info("creating; entries = " + length);
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                LongWritable value = new LongWritable(10 * i);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter);
                Class readerClass = reader.getClass();
                assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
                Text value = reader.createValue();
                Text key = reader.createKey();
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        int keyInt = Integer.parseInt(key.toString());
                        assertFalse("Key in multiple partitions.", bits.get(keyInt));
                        bits.set(keyInt);
                        count++;
                    }
                //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 95 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestSequenceFileInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        //LOG.info("creating; entries = " + length);
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                byte[] data = new byte[random.nextInt(10)];
                random.nextBytes(data);
                BytesWritable value = new BytesWritable(data);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter);
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        assertFalse("Key in multiple partitions.", bits.get(key.get()));
                        bits.set(key.get());
                        count++;
                    }
                //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)338 Test (org.junit.Test)120 Text (org.apache.hadoop.io.Text)115 LongWritable (org.apache.hadoop.io.LongWritable)79 Path (org.apache.hadoop.fs.Path)66 FloatWritable (org.apache.hadoop.io.FloatWritable)58 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)56 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)56 BooleanWritable (org.apache.hadoop.io.BooleanWritable)51 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)50 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)47 BytesWritable (org.apache.hadoop.io.BytesWritable)45 SequenceFile (org.apache.hadoop.io.SequenceFile)41 ArrayList (java.util.ArrayList)40 Writable (org.apache.hadoop.io.Writable)39 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)37 Configuration (org.apache.hadoop.conf.Configuration)35 IOException (java.io.IOException)30 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)29 Random (java.util.Random)28