Search in sources :

Example 1 with MatrixPartitionMeta

use of com.tencent.angel.model.output.format.MatrixPartitionMeta in project angel by Tencent.

the class AMMatrixMetaManager method loadPartitionInfoFromHDFS.

/**
 * Load matrix proto from hdfs.
 *
 * @param path the path
 * @param conf the conf
 * @return matrix partitions
 * @throws IOException the io exception
 */
private List<PartitionMeta> loadPartitionInfoFromHDFS(String path, MatrixContext matrixContext, Configuration conf) throws IOException {
    Path meteFilePath = new Path(new Path(path, matrixContext.getName()), ModelFilesConstent.modelMetaFileName);
    MatrixFilesMeta meta = new MatrixFilesMeta();
    FileSystem fs = meteFilePath.getFileSystem(conf);
    LOG.info("Load matrix meta for matrix " + matrixContext.getName());
    if (!fs.exists(meteFilePath)) {
        throw new IOException("matrix meta file does not exist ");
    }
    FSDataInputStream input = fs.open(meteFilePath);
    try {
        meta.read(input);
    } catch (Throwable e) {
        throw new IOException("Read meta failed ", e);
    } finally {
        input.close();
    }
    List<PartitionMeta> matrixPartitions = new ArrayList<>();
    Map<Integer, MatrixPartitionMeta> partMetas = meta.getPartMetas();
    int matrixId = 0;
    try {
        writeLock.lock();
        matrixId = maxMatrixId++;
    } finally {
        writeLock.unlock();
    }
    for (Map.Entry<Integer, MatrixPartitionMeta> partMetaEntry : partMetas.entrySet()) {
        matrixPartitions.add(new PartitionMeta(matrixId, partMetaEntry.getKey(), partMetaEntry.getValue().getStartRow(), partMetaEntry.getValue().getEndRow(), partMetaEntry.getValue().getStartCol(), partMetaEntry.getValue().getEndCol()));
    }
    return matrixPartitions;
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta) MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixFilesMeta(com.tencent.angel.model.output.format.MatrixFilesMeta) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with MatrixPartitionMeta

use of com.tencent.angel.model.output.format.MatrixPartitionMeta in project angel by Tencent.

the class ModelConverter method groupByPartitions.

private static List<List<MatrixPartitionMeta>> groupByPartitions(Map<Integer, MatrixPartitionMeta> partMetas) {
    List<List<MatrixPartitionMeta>> ret = new ArrayList<>();
    HashMap<String, List<MatrixPartitionMeta>> fileNameToPartsMap = new HashMap<>();
    for (MatrixPartitionMeta partMeta : partMetas.values()) {
        List<MatrixPartitionMeta> modelParts = fileNameToPartsMap.get(partMeta.getFileName());
        if (modelParts == null) {
            modelParts = new ArrayList<>();
            fileNameToPartsMap.put(partMeta.getFileName(), modelParts);
        }
        modelParts.add(partMeta);
    }
    for (List<MatrixPartitionMeta> partList : fileNameToPartsMap.values()) {
        Collections.sort(partList, new Comparator<MatrixPartitionMeta>() {

            @Override
            public int compare(MatrixPartitionMeta part1, MatrixPartitionMeta part2) {
                return (int) (part1.getOffset() - part2.getOffset());
            }
        });
        ret.add(partList);
    }
    return ret;
}
Also used : MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta)

Example 3 with MatrixPartitionMeta

use of com.tencent.angel.model.output.format.MatrixPartitionMeta in project angel by Tencent.

the class ModelConverter method convertPartitions.

private static void convertPartitions(Path modelPath, FileSystem modelFs, Path convertedModelPath, FileSystem convertedModelFs, ModelLineConvert lineConvert, List<MatrixPartitionMeta> partMetas, MatrixFilesMeta modelMeta) throws IOException {
    if (partMetas == null || partMetas.isEmpty()) {
        return;
    }
    String fileName = partMetas.get(0).getFileName();
    int size = partMetas.size();
    LOG.info("start to convert partitions in file " + fileName + ", partition number is " + size);
    long offset = 0;
    FSDataInputStream input = modelFs.open(new Path(modelPath, fileName));
    FSDataOutputStream output = convertedModelFs.create(new Path(convertedModelPath, fileName));
    for (int i = 0; i < size; i++) {
        MatrixPartitionMeta partMeta = modelMeta.getPartMeta(partMetas.get(i).getPartId());
        offset = partMeta.getOffset();
        input.seek(offset);
        convertPartition(input, output, lineConvert, partMeta, modelMeta);
    }
    input.close();
    output.close();
}
Also used : MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta)

Example 4 with MatrixPartitionMeta

use of com.tencent.angel.model.output.format.MatrixPartitionMeta in project angel by Tencent.

the class ModelLoader method loadPartitions.

private static void loadPartitions(Model model, Path matrixPath, FileSystem fs, List<Integer> partitionIds, int startPos, int endPos, MatrixFilesMeta meta) throws IOException {
    FSDataInputStream input = null;
    long offset = 0;
    String currentFileName = "";
    for (int i = startPos; i < endPos; i++) {
        MatrixPartitionMeta partMeta = meta.getPartMeta(partitionIds.get(i));
        String fileName = partMeta.getFileName();
        offset = partMeta.getOffset();
        if (!fileName.equals(currentFileName)) {
            currentFileName = fileName;
            if (input != null) {
                input.close();
            }
            input = fs.open(new Path(matrixPath, currentFileName));
        }
        input.seek(offset);
        loadPartition(model, input, partMeta);
    }
    if (input != null) {
        input.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixPartitionMeta(com.tencent.angel.model.output.format.MatrixPartitionMeta) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Aggregations

MatrixPartitionMeta (com.tencent.angel.model.output.format.MatrixPartitionMeta)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 Path (org.apache.hadoop.fs.Path)2 MatrixFilesMeta (com.tencent.angel.model.output.format.MatrixFilesMeta)1 IOException (java.io.IOException)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 FileSystem (org.apache.hadoop.fs.FileSystem)1