use of org.apache.hadoop.io.DoubleWritable in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
FileOutputFormat.setOutputPath(job, outdir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
OutputCommitter committer = outputFormat.getOutputCommitter(context);
committer.setupJob(job);
RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(context);
}
committer.commitTask(context);
committer.commitJob(job);
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
SequenceFileInputFormat.setInputPaths(job, outdir);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int sourceInt;
double sourceDouble;
while (reader.nextKeyValue()) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
iwritable = reader.getCurrentKey();
dwritable = reader.getCurrentValue();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.DoubleWritable in project hadoop by apache.
the class TestSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(new Path(new Path(System.getProperty("test.build.data", ".")), FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
if (!fs.mkdirs(dir)) {
fail("Failed to create output directory");
}
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
FileOutputFormat.setOutputPath(job, dir.getParent().getParent());
FileOutputFormat.setWorkOutputPath(job, dir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
RecordWriter<BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat().getRecordWriter(fs, job, file.toString(), Reporter.NULL);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(Reporter.NULL);
}
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
DataInputBuffer buf = new DataInputBuffer();
final int NUM_SPLITS = 3;
SequenceFileInputFormat.addInputPath(job, file);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job, NUM_SPLITS)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.getRecordReader(split, job, Reporter.NULL);
try {
int sourceInt;
double sourceDouble;
while (reader.next(iwritable, dwritable)) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.DoubleWritable in project camel by apache.
the class HdfsProducerTest method testWriteDouble.
@Test
public void testWriteDouble() throws Exception {
if (!canTest()) {
return;
}
Double aDouble = 12.34D;
template.sendBody("direct:write_double", aDouble);
Configuration conf = new Configuration();
Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file1));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
reader.next(key, value);
Double rDouble = ((DoubleWritable) value).get();
assertEquals(rDouble, aDouble);
IOHelper.close(reader);
}
use of org.apache.hadoop.io.DoubleWritable in project nifi by apache.
the class TestConvertAvroToORC method test_onTrigger_nested_complex_record.
@Test
public void test_onTrigger_nested_complex_record() throws Exception {
Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {
{
put("key1", Arrays.asList(1.0, 2.0));
put("key2", Arrays.asList(3.0, 4.0));
}
};
Map<String, String> arrayMap11 = new TreeMap<String, String>() {
{
put("key1", "v1");
put("key2", "v2");
}
};
Map<String, String> arrayMap12 = new TreeMap<String, String>() {
{
put("key3", "v3");
put("key4", "v4");
}
};
GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
ByteArrayOutputStream out = new ByteArrayOutputStream();
fileWriter.create(record.getSchema(), out);
fileWriter.append(record);
// Put another record in
Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {
{
put("key1", Arrays.asList(-1.0, -2.0));
put("key2", Arrays.asList(-3.0, -4.0));
}
};
Map<String, String> arrayMap21 = new TreeMap<String, String>() {
{
put("key1", "v-1");
put("key2", "v-2");
}
};
Map<String, String> arrayMap22 = new TreeMap<String, String>() {
{
put("key3", "v-3");
put("key4", "v-4");
}
};
record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
fileWriter.append(record);
fileWriter.flush();
fileWriter.close();
out.close();
Map<String, String> attributes = new HashMap<String, String>() {
{
put(CoreAttributes.FILENAME.key(), "test");
}
};
runner.enqueue(out.toByteArray(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
// Write the flow file out to disk, since the ORC Reader needs a path
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
FileOutputStream fos = new FileOutputStream("target/test1.orc");
fos.write(resultContents);
fos.flush();
fos.close();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rows();
Object o = rows.next(null);
assertNotNull(o);
assertTrue(o instanceof OrcStruct);
TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
// check values
Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
assertTrue(myMapOfArray instanceof Map);
Map map = (Map) myMapOfArray;
Object mapValue = map.get(new Text("key1"));
assertNotNull(mapValue);
assertTrue(mapValue instanceof List);
assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);
Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
assertTrue(myArrayOfMap instanceof List);
List list = (List) myArrayOfMap;
Object el0 = list.get(0);
assertNotNull(el0);
assertTrue(el0 instanceof Map);
assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}
use of org.apache.hadoop.io.DoubleWritable in project incubator-systemml by apache.
the class MapReduceTool method pickValueWeight.
public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p, boolean average) throws IOException {
long[] counts = metadata.getNumItemsArray();
long[] ranges = new long[counts.length];
ranges[0] = counts[0];
for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i];
long total = ranges[ranges.length - 1];
// do averaging only if it is asked for; and sum_wt is even
average = average && (total % 2 == 0);
int currentPart = 0;
double cum_weight = 0;
long pos = (long) Math.ceil(total * p);
while (ranges[currentPart] < pos) {
currentPart++;
cum_weight += ranges[currentPart];
}
int offset;
if (currentPart > 0)
offset = (int) (pos - ranges[currentPart - 1] - 1);
else
offset = (int) pos - 1;
Path path = new Path(dir);
FileSystem fs = IOUtilFunctions.getFileSystem(path);
FileStatus[] files = fs.listStatus(path);
Path fileToRead = null;
for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
fileToRead = file.getPath();
break;
}
if (fileToRead == null)
throw new RuntimeException("cannot read partition " + currentPart);
int buffsz = 64 * 1024;
DoubleWritable readKey = new DoubleWritable();
IntWritable readValue = new IntWritable();
FSDataInputStream currentStream = null;
double ret = -1;
try {
currentStream = fs.open(fileToRead, buffsz);
boolean contain0s = false;
long numZeros = 0;
if (currentPart == metadata.getPartitionOfZero()) {
contain0s = true;
numZeros = metadata.getNumberOfZero();
}
ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);
int numRead = 0;
while (numRead <= offset) {
reader.readNextKeyValuePairs(readKey, readValue);
numRead += readValue.get();
cum_weight += readValue.get();
}
ret = readKey.get();
if (average) {
if (numRead <= offset + 1) {
reader.readNextKeyValuePairs(readKey, readValue);
cum_weight += readValue.get();
ret = (ret + readKey.get()) / 2;
}
}
} finally {
IOUtilFunctions.closeSilently(currentStream);
}
return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}
Aggregations