Search in sources :

Example 1 with ReadWithZeros

use of org.apache.sysml.runtime.matrix.sort.ReadWithZeros in project incubator-systemml by apache.

the class MapReduceTool method pickValueWeight.

public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p, boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];
    for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i];
    long total = ranges[ranges.length - 1];
    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);
    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;
    Path path = new Path(dir);
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
        fileToRead = file.getPath();
        break;
    }
    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);
    int buffsz = 64 * 1024;
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();
    FSDataInputStream currentStream = null;
    double ret = -1;
    try {
        currentStream = fs.open(fileToRead, buffsz);
        boolean contain0s = false;
        long numZeros = 0;
        if (currentPart == metadata.getPartitionOfZero()) {
            contain0s = true;
            numZeros = metadata.getNumberOfZero();
        }
        ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);
        int numRead = 0;
        while (numRead <= offset) {
            reader.readNextKeyValuePairs(readKey, readValue);
            numRead += readValue.get();
            cum_weight += readValue.get();
        }
        ret = readKey.get();
        if (average) {
            if (numRead <= offset + 1) {
                reader.readNextKeyValuePairs(readKey, readValue);
                cum_weight += readValue.get();
                ret = (ret + readKey.get()) / 2;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(currentStream);
    }
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ReadWithZeros(org.apache.sysml.runtime.matrix.sort.ReadWithZeros) IntWritable(org.apache.hadoop.io.IntWritable)

Example 2 with ReadWithZeros

use of org.apache.sysml.runtime.matrix.sort.ReadWithZeros in project incubator-systemml by apache.

the class MapReduceTool method pickValueWeight.

public static double[] pickValueWeight(String dir, NumItemsByEachReducerMetaData metadata, double p, boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];
    for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i];
    long total = ranges[ranges.length - 1];
    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);
    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;
    Path path = new Path(dir);
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
        fileToRead = file.getPath();
        break;
    }
    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);
    int buffsz = 64 * 1024;
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();
    FSDataInputStream currentStream = null;
    double ret = -1;
    try {
        currentStream = fs.open(fileToRead, buffsz);
        boolean contain0s = false;
        long numZeros = 0;
        if (currentPart == metadata.getPartitionOfZero()) {
            contain0s = true;
            numZeros = metadata.getNumberOfZero();
        }
        ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);
        int numRead = 0;
        while (numRead <= offset) {
            reader.readNextKeyValuePairs(readKey, readValue);
            numRead += readValue.get();
            cum_weight += readValue.get();
        }
        ret = readKey.get();
        if (average) {
            if (numRead <= offset + 1) {
                reader.readNextKeyValuePairs(readKey, readValue);
                cum_weight += readValue.get();
                ret = (ret + readKey.get()) / 2;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(currentStream);
    }
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ReadWithZeros(org.apache.sysml.runtime.matrix.sort.ReadWithZeros) IntWritable(org.apache.hadoop.io.IntWritable)

Example 3 with ReadWithZeros

use of org.apache.sysml.runtime.matrix.sort.ReadWithZeros in project systemml by apache.

the class MapReduceTool method pickValueWeight.

public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p, boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];
    for (int i = 1; i < counts.length; i++) ranges[i] = ranges[i - 1] + counts[i];
    long total = ranges[ranges.length - 1];
    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);
    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;
    Path path = new Path(dir);
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files) if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
        fileToRead = file.getPath();
        break;
    }
    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);
    int buffsz = 64 * 1024;
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();
    FSDataInputStream currentStream = null;
    double ret = -1;
    try {
        currentStream = fs.open(fileToRead, buffsz);
        boolean contain0s = false;
        long numZeros = 0;
        if (currentPart == metadata.getPartitionOfZero()) {
            contain0s = true;
            numZeros = metadata.getNumberOfZero();
        }
        ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);
        int numRead = 0;
        while (numRead <= offset) {
            reader.readNextKeyValuePairs(readKey, readValue);
            numRead += readValue.get();
            cum_weight += readValue.get();
        }
        ret = readKey.get();
        if (average) {
            if (numRead <= offset + 1) {
                reader.readNextKeyValuePairs(readKey, readValue);
                cum_weight += readValue.get();
                ret = (ret + readKey.get()) / 2;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(currentStream);
    }
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DoubleWritable(org.apache.hadoop.io.DoubleWritable) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ReadWithZeros(org.apache.sysml.runtime.matrix.sort.ReadWithZeros) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 DoubleWritable (org.apache.hadoop.io.DoubleWritable)3 IntWritable (org.apache.hadoop.io.IntWritable)3 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 ReadWithZeros (org.apache.sysml.runtime.matrix.sort.ReadWithZeros)3