Search in sources :

Example 1 with Converter

use of org.apache.sysml.runtime.matrix.data.Converter in project systemml by apache.

the class MRJobConfiguration method getMultipleConvertedOutputs.

public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) {
    byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job);
    Converter[] outputConverters = new Converter[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<>();
            tagMapping.put(output, vec);
        }
        vec.add(i);
        outputConverters[i] = getOuputConverter(job, i);
        stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
    }
    MultipleOutputs multipleOutputs = new MultipleOutputs(job);
    return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) MultipleOutputs(org.apache.hadoop.mapred.lib.MultipleOutputs) IdenticalConverter(org.apache.sysml.runtime.matrix.data.IdenticalConverter) TextCellToRowBlockConverter(org.apache.sysml.runtime.matrix.data.TextCellToRowBlockConverter) BinaryCellToRowBlockConverter(org.apache.sysml.runtime.matrix.data.BinaryCellToRowBlockConverter) WeightedCellToSortInputConverter(org.apache.sysml.runtime.matrix.data.WeightedCellToSortInputConverter) TextToBinaryCellConverter(org.apache.sysml.runtime.matrix.data.TextToBinaryCellConverter) BinaryBlockToRowBlockConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToRowBlockConverter) Converter(org.apache.sysml.runtime.matrix.data.Converter) AddDummyWeightConverter(org.apache.sysml.runtime.matrix.data.AddDummyWeightConverter) BinaryBlockToBinaryCellConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToBinaryCellConverter) BinaryBlockToTextCellConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToTextCellConverter) BinaryCellToTextConverter(org.apache.sysml.runtime.matrix.data.BinaryCellToTextConverter)

Example 2 with Converter

use of org.apache.sysml.runtime.matrix.data.Converter in project incubator-systemml by apache.

the class MRJobConfiguration method getMultipleConvertedOutputs.

public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job) {
    byte[] resultIndexes = MRJobConfiguration.getResultIndexes(job);
    Converter[] outputConverters = new Converter[resultIndexes.length];
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    HashMap<Byte, ArrayList<Integer>> tagMapping = new HashMap<>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<>();
            tagMapping.put(output, vec);
        }
        vec.add(i);
        outputConverters[i] = getOuputConverter(job, i);
        stats[i] = MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
    }
    MultipleOutputs multipleOutputs = new MultipleOutputs(job);
    return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) MultipleOutputs(org.apache.hadoop.mapred.lib.MultipleOutputs) IdenticalConverter(org.apache.sysml.runtime.matrix.data.IdenticalConverter) TextCellToRowBlockConverter(org.apache.sysml.runtime.matrix.data.TextCellToRowBlockConverter) BinaryCellToRowBlockConverter(org.apache.sysml.runtime.matrix.data.BinaryCellToRowBlockConverter) WeightedCellToSortInputConverter(org.apache.sysml.runtime.matrix.data.WeightedCellToSortInputConverter) TextToBinaryCellConverter(org.apache.sysml.runtime.matrix.data.TextToBinaryCellConverter) BinaryBlockToRowBlockConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToRowBlockConverter) Converter(org.apache.sysml.runtime.matrix.data.Converter) AddDummyWeightConverter(org.apache.sysml.runtime.matrix.data.AddDummyWeightConverter) BinaryBlockToBinaryCellConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToBinaryCellConverter) BinaryBlockToTextCellConverter(org.apache.sysml.runtime.matrix.data.BinaryBlockToTextCellConverter) BinaryCellToTextConverter(org.apache.sysml.runtime.matrix.data.BinaryCellToTextConverter)

Example 3 with Converter

use of org.apache.sysml.runtime.matrix.data.Converter in project incubator-systemml by apache.

the class SamplingSortMRInputFormat method writePartitionFile.

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @return index value
 * @throws IOException if something goes wrong
 * @throws InstantiationException if InstantiationException occurs
 * @throws IllegalAccessException if IllegalAccessException occurs
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();
    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
    // get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
    // indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input
    int totalcount = 0;
    for (int i = 0; i < samples; i++) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }
    if (// empty input files
    totalcount == 0)
        sampler.addValue(new DoubleWritable(0));
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    // note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = null;
    int index0 = -1;
    try {
        writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        int i = 0;
        boolean lessthan0 = true;
        for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
            writer.append(splitValue, nullValue);
            if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
                index0 = i;
                lessthan0 = false;
            }
            i++;
        }
        if (lessthan0)
            index0 = partitions - 1;
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    return index0;
}
Also used : SequenceFileRecordReader(org.apache.hadoop.mapred.SequenceFileRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Converter(org.apache.sysml.runtime.matrix.data.Converter) InputSplit(org.apache.hadoop.mapred.InputSplit) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Example 4 with Converter

use of org.apache.sysml.runtime.matrix.data.Converter in project systemml by apache.

the class SamplingSortMRInputFormat method writePartitionFile.

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @return index value
 * @throws IOException if something goes wrong
 * @throws InstantiationException if InstantiationException occurs
 * @throws IllegalAccessException if IllegalAccessException occurs
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();
    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
    // get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
    // indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input
    int totalcount = 0;
    for (int i = 0; i < samples; i++) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }
    if (// empty input files
    totalcount == 0)
        sampler.addValue(new DoubleWritable(0));
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    // note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = null;
    int index0 = -1;
    try {
        writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        int i = 0;
        boolean lessthan0 = true;
        for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
            writer.append(splitValue, nullValue);
            if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
                index0 = i;
                lessthan0 = false;
            }
            i++;
        }
        if (lessthan0)
            index0 = partitions - 1;
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    return index0;
}
Also used : SequenceFileRecordReader(org.apache.hadoop.mapred.SequenceFileRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Converter(org.apache.sysml.runtime.matrix.data.Converter) InputSplit(org.apache.hadoop.mapred.InputSplit) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Aggregations

Converter (org.apache.sysml.runtime.matrix.data.Converter)4 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 DoubleWritable (org.apache.hadoop.io.DoubleWritable)2 NullWritable (org.apache.hadoop.io.NullWritable)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 Writable (org.apache.hadoop.io.Writable)2 WritableComparable (org.apache.hadoop.io.WritableComparable)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 SequenceFileRecordReader (org.apache.hadoop.mapred.SequenceFileRecordReader)2 MultipleOutputs (org.apache.hadoop.mapred.lib.MultipleOutputs)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 AddDummyWeightConverter (org.apache.sysml.runtime.matrix.data.AddDummyWeightConverter)2 BinaryBlockToBinaryCellConverter (org.apache.sysml.runtime.matrix.data.BinaryBlockToBinaryCellConverter)2 BinaryBlockToRowBlockConverter (org.apache.sysml.runtime.matrix.data.BinaryBlockToRowBlockConverter)2 BinaryBlockToTextCellConverter (org.apache.sysml.runtime.matrix.data.BinaryBlockToTextCellConverter)2 BinaryCellToRowBlockConverter (org.apache.sysml.runtime.matrix.data.BinaryCellToRowBlockConverter)2 BinaryCellToTextConverter (org.apache.sysml.runtime.matrix.data.BinaryCellToTextConverter)2