Search in sources :

Example 1 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.

the class TfUtils method getPartFileID.

/**
	 * Function to generate custom file names (transform-part-.....) for
	 * mappers' output for ApplyTfCSV job. The idea is to find the index 
	 * of (thisfile, fileoffset) in the list of all offsets from the 
	 * counters/offsets file, which was generated from either GenTfMtdMR
	 * or AssignRowIDMR job.
	 * 
	 * @param job job configuration
	 * @param offset file offset
	 * @return part file id (ie, 00001, 00002, etc)
	 * @throws IOException if IOException occurs
	 */
public String getPartFileID(JobConf job, long offset) throws IOException {
    Reader reader = null;
    int id = 0;
    try {
        reader = initOffsetsReader(job);
        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        String thisFile = TfUtils.getPartFileName(job);
        while (reader.next(key, value)) {
            if (thisFile.equals(value.filename) && value.fileOffset == offset)
                break;
            id++;
        }
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
    String sid = Integer.toString(id);
    char[] carr = new char[5 - sid.length()];
    Arrays.fill(carr, '0');
    String ret = (new String(carr)).concat(sid);
    return ret;
}
Also used : OffsetCount(org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount) MatrixReader(org.apache.sysml.runtime.io.MatrixReader) Reader(org.apache.hadoop.io.SequenceFile.Reader) ByteWritable(org.apache.hadoop.io.ByteWritable)

Example 2 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.

the class ApplyTfBBMapper method configure.

@Override
public void configure(JobConf job) {
    super.configure(job);
    try {
        _partFileWithHeader = TfUtils.isPartFileWithHeader(job);
        tfmapper = new TfUtils(job);
        tfmapper.loadTfMetadata(job, true);
        // Load relevant information for CSV Reblock
        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        Path path = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        String thisfile = path.makeQualified(fs).toString();
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, p, job);
            while (reader.next(key, value)) {
                // "key" needn't be checked since the offset file has information about a single CSV input (the raw data file)
                if (thisfile.equals(value.filename))
                    offsetMap.put(value.fileOffset, value.count);
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
        idxRow = new CSVReblockMapper.IndexedBlockRow();
        int maxBclen = 0;
        for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) {
            if (maxBclen < in.bclen)
                maxBclen = in.bclen;
        }
        //always dense since common csv usecase
        idxRow.getRow().data.reset(1, maxBclen, false);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (JSONException e) {
        throw new RuntimeException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CSVReblockMapper(org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper) IndexedBlockRow(org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.IndexedBlockRow) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) JSONException(org.apache.wink.json4j.JSONException) IOException(java.io.IOException) OffsetCount(org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) ByteWritable(org.apache.hadoop.io.ByteWritable)

Example 3 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project presto by prestodb.

the class OrcTester method decodeRecordReaderValue.

private static Object decodeRecordReaderValue(Type type, Object actualValue) {
    if (actualValue instanceof OrcLazyObject) {
        try {
            actualValue = ((OrcLazyObject) actualValue).materialize();
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }
    if (actualValue instanceof BooleanWritable) {
        actualValue = ((BooleanWritable) actualValue).get();
    } else if (actualValue instanceof ByteWritable) {
        actualValue = ((ByteWritable) actualValue).get();
    } else if (actualValue instanceof BytesWritable) {
        actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes());
    } else if (actualValue instanceof DateWritable) {
        actualValue = new SqlDate(((DateWritable) actualValue).getDays());
    } else if (actualValue instanceof DoubleWritable) {
        actualValue = ((DoubleWritable) actualValue).get();
    } else if (actualValue instanceof FloatWritable) {
        actualValue = ((FloatWritable) actualValue).get();
    } else if (actualValue instanceof IntWritable) {
        actualValue = ((IntWritable) actualValue).get();
    } else if (actualValue instanceof HiveCharWritable) {
        actualValue = ((HiveCharWritable) actualValue).getPaddedValue().toString();
    } else if (actualValue instanceof LongWritable) {
        actualValue = ((LongWritable) actualValue).get();
    } else if (actualValue instanceof ShortWritable) {
        actualValue = ((ShortWritable) actualValue).get();
    } else if (actualValue instanceof HiveDecimalWritable) {
        DecimalType decimalType = (DecimalType) type;
        HiveDecimalWritable writable = (HiveDecimalWritable) actualValue;
        // writable messes with the scale so rescale the values to the Presto type
        BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale());
        actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale());
    } else if (actualValue instanceof Text) {
        actualValue = actualValue.toString();
    } else if (actualValue instanceof TimestampWritable) {
        TimestampWritable timestamp = (TimestampWritable) actualValue;
        actualValue = sqlTimestampOf((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), SESSION);
    } else if (actualValue instanceof OrcStruct) {
        List<Object> fields = new ArrayList<>();
        OrcStruct structObject = (OrcStruct) actualValue;
        for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) {
            fields.add(OrcUtil.getFieldValue(structObject, fieldId));
        }
        actualValue = decodeRecordReaderStruct(type, fields);
    } else if (actualValue instanceof com.facebook.hive.orc.OrcStruct) {
        List<Object> fields = new ArrayList<>();
        com.facebook.hive.orc.OrcStruct structObject = (com.facebook.hive.orc.OrcStruct) actualValue;
        for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) {
            fields.add(structObject.getFieldValue(fieldId));
        }
        actualValue = decodeRecordReaderStruct(type, fields);
    } else if (actualValue instanceof List) {
        actualValue = decodeRecordReaderList(type, ((List<?>) actualValue));
    } else if (actualValue instanceof Map) {
        actualValue = decodeRecordReaderMap(type, (Map<?, ?>) actualValue);
    }
    return actualValue;
}
Also used : SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) UncheckedIOException(java.io.UncheckedIOException) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) Text(org.apache.hadoop.io.Text) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) SqlDate(com.facebook.presto.common.type.SqlDate) DecimalType(com.facebook.presto.common.type.DecimalType) BigInteger(java.math.BigInteger) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 4 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project camel by apache.

the class HdfsConsumerTest method testReadByte.

@Test
public void testReadByte() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-byte").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, ByteWritable.class);
    NullWritable keyWritable = NullWritable.get();
    ByteWritable valueWritable = new ByteWritable();
    byte value = 3;
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.message(0).body(byte.class).isEqualTo(3);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) Test(org.junit.Test)

Example 5 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project camel by apache.

the class HdfsConsumerTest method testReadByte.

@Test
public void testReadByte() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-byte").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, ByteWritable.class);
    NullWritable keyWritable = NullWritable.get();
    ByteWritable valueWritable = new ByteWritable();
    byte value = 3;
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.message(0).body(byte.class).isEqualTo(3);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Aggregations

ByteWritable (org.apache.hadoop.io.ByteWritable)15 Path (org.apache.hadoop.fs.Path)8 SequenceFile (org.apache.hadoop.io.SequenceFile)8 BooleanWritable (org.apache.hadoop.io.BooleanWritable)7 DoubleWritable (org.apache.hadoop.io.DoubleWritable)7 IntWritable (org.apache.hadoop.io.IntWritable)7 LongWritable (org.apache.hadoop.io.LongWritable)7 FloatWritable (org.apache.hadoop.io.FloatWritable)6 Test (org.junit.Test)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 OffsetCount (org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount)5 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 BytesWritable (org.apache.hadoop.io.BytesWritable)4 NullWritable (org.apache.hadoop.io.NullWritable)4 Text (org.apache.hadoop.io.Text)4 Writable (org.apache.hadoop.io.Writable)4 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)3 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3