Search in sources :

Example 1 with DataFileChunk

use of voldemort.store.readonly.chunk.DataFileChunk in project voldemort by voldemort.

the class HadoopStoreBuilderUtils method getDataFileChunkSet.

/**
     * Convert list of FileStatus[] files to DataFileChunkSet. The input to this
     * is generally the output of getChunkFiles function.
     * 
     * Works only for {@link ReadOnlyStorageFormat.READONLY_V2}
     * 
     * @param fs Filesystem used
     * @param files List of data chunk files
     * @return DataFileChunkSet Returns the corresponding data chunk set
     * @throws IOException
     */
public static DataFileChunkSet getDataFileChunkSet(FileSystem fs, FileStatus[] files) throws IOException {
    // Make sure it satisfies the partitionId_replicaType format
    List<FileStatus> fileList = Lists.newArrayList();
    for (FileStatus file : files) {
        if (!ReadOnlyUtils.isFormatCorrect(file.getPath().getName(), ReadOnlyStorageFormat.READONLY_V2)) {
            throw new VoldemortException("Incorrect data file name format for " + file.getPath().getName() + ". Unsupported by " + ReadOnlyStorageFormat.READONLY_V2);
        }
        fileList.add(file);
    }
    // Return it in sorted order
    Collections.sort(fileList, new Comparator<FileStatus>() {

        public int compare(FileStatus f1, FileStatus f2) {
            int chunkId1 = ReadOnlyUtils.getChunkId(f1.getPath().getName());
            int chunkId2 = ReadOnlyUtils.getChunkId(f2.getPath().getName());
            return chunkId1 - chunkId2;
        }
    });
    List<DataFileChunk> dataFiles = Lists.newArrayList();
    List<Integer> dataFileSizes = Lists.newArrayList();
    for (FileStatus file : fileList) {
        dataFiles.add(new HdfsDataFileChunk(fs, file));
        dataFileSizes.add((int) file.getLen());
    }
    return new DataFileChunkSet(dataFiles, dataFileSizes);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) DataFileChunk(voldemort.store.readonly.chunk.DataFileChunk) DataFileChunkSet(voldemort.store.readonly.chunk.DataFileChunkSet) VoldemortException(voldemort.VoldemortException)

Aggregations

FileStatus (org.apache.hadoop.fs.FileStatus)1 VoldemortException (voldemort.VoldemortException)1 DataFileChunk (voldemort.store.readonly.chunk.DataFileChunk)1 DataFileChunkSet (voldemort.store.readonly.chunk.DataFileChunkSet)1