Search in sources :

Example 1 with DataFileChunkSet

use of voldemort.store.readonly.chunk.DataFileChunkSet in project voldemort by voldemort.

the class HadoopStoreBuilderUtilsTest method testGetDataFileChunkSet.

@Test
public void testGetDataFileChunkSet() throws IOException {
    Path headPath = new Path(TestUtils.createTempDir().getAbsolutePath());
    Path testPath = new Path(headPath, "0_0_100.data");
    Path junkPath = new Path(headPath, "1_1_100.data");
    FileSystem fs = testPath.getFileSystem(new Configuration());
    // 1) Just one correct file
    fs.create(testPath);
    fs.create(junkPath);
    writeRandomData(testPath, 100);
    DataFileChunkSet set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
    assertEquals(set.getNumChunks(), 1);
    assertEquals(set.getDataFileSize(0), 100);
    // 2) Another correct file
    testPath = new Path(headPath, "0_0_99.data");
    fs.create(testPath);
    writeRandomData(testPath, 99);
    set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
    assertEquals(set.getNumChunks(), 2);
    assertEquals(set.getDataFileSize(0), 99);
    assertEquals(set.getDataFileSize(1), 100);
    // 3) Add some more files
    testPath = new Path(headPath, "0_0_1.data");
    fs.create(testPath);
    writeRandomData(testPath, 1);
    testPath = new Path(headPath, "0_0_10.data");
    fs.create(testPath);
    writeRandomData(testPath, 10);
    testPath = new Path(headPath, "0_0_999.data");
    fs.create(testPath);
    writeRandomData(testPath, 999);
    testPath = new Path(headPath, "0_0_101.data");
    fs.create(testPath);
    writeRandomData(testPath, 101);
    testPath = new Path(headPath, "0_0_1000.data");
    fs.create(testPath);
    writeRandomData(testPath, 1000);
    set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
    assertEquals(set.getNumChunks(), 7);
    assertEquals(set.getDataFileSize(0), 1);
    assertEquals(set.getDataFileSize(1), 10);
    assertEquals(set.getDataFileSize(2), 99);
    assertEquals(set.getDataFileSize(3), 100);
    assertEquals(set.getDataFileSize(4), 101);
    assertEquals(set.getDataFileSize(5), 999);
    assertEquals(set.getDataFileSize(6), 1000);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) DataFileChunkSet(voldemort.store.readonly.chunk.DataFileChunkSet) Test(org.junit.Test)

Example 2 with DataFileChunkSet

use of voldemort.store.readonly.chunk.DataFileChunkSet in project voldemort by voldemort.

the class HadoopStoreBuilderUtils method getDataFileChunkSet.

/**
     * Convert list of FileStatus[] files to DataFileChunkSet. The input to this
     * is generally the output of getChunkFiles function.
     * 
     * Works only for {@link ReadOnlyStorageFormat.READONLY_V2}
     * 
     * @param fs Filesystem used
     * @param files List of data chunk files
     * @return DataFileChunkSet Returns the corresponding data chunk set
     * @throws IOException
     */
public static DataFileChunkSet getDataFileChunkSet(FileSystem fs, FileStatus[] files) throws IOException {
    // Make sure it satisfies the partitionId_replicaType format
    List<FileStatus> fileList = Lists.newArrayList();
    for (FileStatus file : files) {
        if (!ReadOnlyUtils.isFormatCorrect(file.getPath().getName(), ReadOnlyStorageFormat.READONLY_V2)) {
            throw new VoldemortException("Incorrect data file name format for " + file.getPath().getName() + ". Unsupported by " + ReadOnlyStorageFormat.READONLY_V2);
        }
        fileList.add(file);
    }
    // Return it in sorted order
    Collections.sort(fileList, new Comparator<FileStatus>() {

        public int compare(FileStatus f1, FileStatus f2) {
            int chunkId1 = ReadOnlyUtils.getChunkId(f1.getPath().getName());
            int chunkId2 = ReadOnlyUtils.getChunkId(f2.getPath().getName());
            return chunkId1 - chunkId2;
        }
    });
    List<DataFileChunk> dataFiles = Lists.newArrayList();
    List<Integer> dataFileSizes = Lists.newArrayList();
    for (FileStatus file : fileList) {
        dataFiles.add(new HdfsDataFileChunk(fs, file));
        dataFileSizes.add((int) file.getLen());
    }
    return new DataFileChunkSet(dataFiles, dataFileSizes);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) DataFileChunk(voldemort.store.readonly.chunk.DataFileChunk) DataFileChunkSet(voldemort.store.readonly.chunk.DataFileChunkSet) VoldemortException(voldemort.VoldemortException)

Aggregations

DataFileChunkSet (voldemort.store.readonly.chunk.DataFileChunkSet)2 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 Test (org.junit.Test)1 VoldemortException (voldemort.VoldemortException)1 DataFileChunk (voldemort.store.readonly.chunk.DataFileChunk)1