Search in sources :

Example 1 with DoubleWritable

use of org.apache.hadoop.io.DoubleWritable in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 2 with DoubleWritable

use of org.apache.hadoop.io.DoubleWritable in project hadoop by apache.

the class TestSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException {
    JobConf job = new JobConf();
    FileSystem fs = FileSystem.getLocal(job);
    Path dir = new Path(new Path(new Path(System.getProperty("test.build.data", ".")), FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
    Path file = new Path(dir, "testbinary.seq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    fs.delete(dir, true);
    if (!fs.mkdirs(dir)) {
        fail("Failed to create output directory");
    }
    job.set(JobContext.TASK_ATTEMPT_ID, attempt);
    FileOutputFormat.setOutputPath(job, dir.getParent().getParent());
    FileOutputFormat.setWorkOutputPath(job, dir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    RecordWriter<BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat().getRecordWriter(fs, job, file.toString(), Reporter.NULL);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(Reporter.NULL);
    }
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    DataInputBuffer buf = new DataInputBuffer();
    final int NUM_SPLITS = 3;
    SequenceFileInputFormat.addInputPath(job, file);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job, NUM_SPLITS)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.getRecordReader(split, job, Reporter.NULL);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.next(iwritable, dwritable)) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 3 with DoubleWritable

use of org.apache.hadoop.io.DoubleWritable in project camel by apache.

the class HdfsProducerTest method testWriteDouble.

@Test
public void testWriteDouble() throws Exception {
    if (!canTest()) {
        return;
    }
    Double aDouble = 12.34D;
    template.sendBody("direct:write_double", aDouble);
    Configuration conf = new Configuration();
    Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
    SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file1));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    reader.next(key, value);
    Double rDouble = ((DoubleWritable) value).get();
    assertEquals(rDouble, aDouble);
    IOHelper.close(reader);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Test(org.junit.Test)

Example 4 with DoubleWritable

use of org.apache.hadoop.io.DoubleWritable in project nifi by apache.

the class TestConvertAvroToORC method test_onTrigger_nested_complex_record.

@Test
public void test_onTrigger_nested_complex_record() throws Exception {
    Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {

        {
            put("key1", Arrays.asList(1.0, 2.0));
            put("key2", Arrays.asList(3.0, 4.0));
        }
    };
    Map<String, String> arrayMap11 = new TreeMap<String, String>() {

        {
            put("key1", "v1");
            put("key2", "v2");
        }
    };
    Map<String, String> arrayMap12 = new TreeMap<String, String>() {

        {
            put("key3", "v3");
            put("key4", "v4");
        }
    };
    GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));
    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    // Put another record in
    Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {

        {
            put("key1", Arrays.asList(-1.0, -2.0));
            put("key2", Arrays.asList(-3.0, -4.0));
        }
    };
    Map<String, String> arrayMap21 = new TreeMap<String, String>() {

        {
            put("key1", "v-1");
            put("key2", "v-2");
        }
    };
    Map<String, String> arrayMap22 = new TreeMap<String, String>() {

        {
            put("key3", "v-3");
            put("key4", "v-4");
        }
    };
    record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();
    Map<String, String> attributes = new HashMap<String, String>() {

        {
            put(CoreAttributes.FILENAME.key(), "test");
        }
    };
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();
    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
    // check values
    Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
    assertTrue(myMapOfArray instanceof Map);
    Map map = (Map) myMapOfArray;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof List);
    assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);
    Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
    assertTrue(myArrayOfMap instanceof List);
    List list = (List) myArrayOfMap;
    Object el0 = list.get(0);
    assertNotNull(el0);
    assertTrue(el0 instanceof Map);
    assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) DoubleWritable(org.apache.hadoop.io.DoubleWritable) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) Path(org.apache.hadoop.fs.Path) DataFileWriter(org.apache.avro.file.DataFileWriter) Text(org.apache.hadoop.io.Text) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeMap(java.util.TreeMap) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MockFlowFile(org.apache.nifi.util.MockFlowFile) FileOutputStream(java.io.FileOutputStream) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 5 with DoubleWritable

use of org.apache.hadoop.io.DoubleWritable in project incubator-systemml by apache.

the class MapReduceTool method pickValueWeight.

public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p, boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];
    for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i];
    long total = ranges[ranges.length - 1];
    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);
    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;
    Path path = new Path(dir);
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
        fileToRead = file.getPath();
        break;
    }
    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);
    int buffsz = 64 * 1024;
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();
    FSDataInputStream currentStream = null;
    double ret = -1;
    try {
        currentStream = fs.open(fileToRead, buffsz);
        boolean contain0s = false;
        long numZeros = 0;
        if (currentPart == metadata.getPartitionOfZero()) {
            contain0s = true;
            numZeros = metadata.getNumberOfZero();
        }
        ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);
        int numRead = 0;
        while (numRead <= offset) {
            reader.readNextKeyValuePairs(readKey, readValue);
            numRead += readValue.get();
            cum_weight += readValue.get();
        }
        ret = readKey.get();
        if (average) {
            if (numRead <= offset + 1) {
                reader.readNextKeyValuePairs(readKey, readValue);
                cum_weight += readValue.get();
                ret = (ret + readKey.get()) / 2;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(currentStream);
    }
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ReadWithZeros(org.apache.sysml.runtime.matrix.sort.ReadWithZeros) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

DoubleWritable (org.apache.hadoop.io.DoubleWritable)38 IntWritable (org.apache.hadoop.io.IntWritable)20 Test (org.junit.Test)19 Path (org.apache.hadoop.fs.Path)12 BooleanWritable (org.apache.hadoop.io.BooleanWritable)12 LongWritable (org.apache.hadoop.io.LongWritable)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 FloatWritable (org.apache.hadoop.io.FloatWritable)11 FileSystem (org.apache.hadoop.fs.FileSystem)10 Writable (org.apache.hadoop.io.Writable)10 Configuration (org.apache.hadoop.conf.Configuration)9 Text (org.apache.hadoop.io.Text)8 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)7 ByteWritable (org.apache.hadoop.io.ByteWritable)7 NullWritable (org.apache.hadoop.io.NullWritable)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 SequenceFile (org.apache.hadoop.io.SequenceFile)5 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4