Search in sources :

Example 11 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.

the class CSVReblockMapper method configure.

@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
    super.configure(job);
    //get the number colums per block
    //load the offset mapping
    byte matrixIndex = representativeMatrixes.get(0);
    try {
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
        FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
        thisPath = thisPath.makeQualified(fs);
        String filename = thisPath.toString();
        Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]).makeQualified(fs);
        if (headerPath.toString().equals(filename))
            headerFile = true;
        ByteWritable key = new ByteWritable();
        OffsetCount value = new OffsetCount();
        Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, p, job);
            while (reader.next(key, value)) {
                if (key.get() == matrixIndex && filename.equals(value.filename))
                    offsetMap.put(value.fileOffset, value.count);
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
    _delim = ins.delim;
    ignoreFirstLine = ins.hasHeader;
    idxRow = new IndexedBlockRow();
    int maxBclen = 0;
    for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) {
        if (maxBclen < in.bclen)
            maxBclen = in.bclen;
    }
    //always dense since common csv usecase
    idxRow.getRow().data.reset(1, maxBclen, false);
}
Also used : Path(org.apache.hadoop.fs.Path) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) IOException(java.io.IOException) OffsetCount(org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) ByteWritable(org.apache.hadoop.io.ByteWritable)

Example 12 with ByteWritable

use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.

the class GTFMTDReducer method generateOffsetsFile.

@SuppressWarnings({ "unchecked", "deprecation" })
private long generateOffsetsFile(ArrayList<OffsetCount> list) throws IllegalArgumentException, IOException {
    Collections.sort(list);
    SequenceFile.Writer writer = null;
    long lineOffset = 0;
    try {
        writer = new SequenceFile.Writer(FileSystem.get(_rJob), _rJob, new Path(_agents.getOffsetFile() + "/part-00000"), ByteWritable.class, OffsetCount.class);
        for (OffsetCount oc : list) {
            long count = oc.count;
            oc.count = lineOffset;
            writer.append(new ByteWritable((byte) 0), oc);
            lineOffset += count;
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    list.clear();
    return lineOffset;
}
Also used : Path(org.apache.hadoop.fs.Path) OffsetCount(org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteWritable(org.apache.hadoop.io.ByteWritable)

Aggregations

ByteWritable (org.apache.hadoop.io.ByteWritable)12 Path (org.apache.hadoop.fs.Path)7 SequenceFile (org.apache.hadoop.io.SequenceFile)7 DoubleWritable (org.apache.hadoop.io.DoubleWritable)6 FloatWritable (org.apache.hadoop.io.FloatWritable)6 IntWritable (org.apache.hadoop.io.IntWritable)6 LongWritable (org.apache.hadoop.io.LongWritable)6 BooleanWritable (org.apache.hadoop.io.BooleanWritable)5 Test (org.junit.Test)5 Configuration (org.apache.hadoop.conf.Configuration)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 NullWritable (org.apache.hadoop.io.NullWritable)4 Text (org.apache.hadoop.io.Text)4 Writable (org.apache.hadoop.io.Writable)4 OffsetCount (org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount)4 BytesWritable (org.apache.hadoop.io.BytesWritable)3 BasicDBObject (com.mongodb.BasicDBObject)2 File (java.io.File)2 IOException (java.io.IOException)2 RouteBuilder (org.apache.camel.builder.RouteBuilder)2