use of voldemort.store.readonly.chunk.DataFileChunkSet in project voldemort by voldemort.
the class HadoopStoreBuilderUtilsTest method testGetDataFileChunkSet.
@Test
public void testGetDataFileChunkSet() throws IOException {
Path headPath = new Path(TestUtils.createTempDir().getAbsolutePath());
Path testPath = new Path(headPath, "0_0_100.data");
Path junkPath = new Path(headPath, "1_1_100.data");
FileSystem fs = testPath.getFileSystem(new Configuration());
// 1) Just one correct file
fs.create(testPath);
fs.create(junkPath);
writeRandomData(testPath, 100);
DataFileChunkSet set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
assertEquals(set.getNumChunks(), 1);
assertEquals(set.getDataFileSize(0), 100);
// 2) Another correct file
testPath = new Path(headPath, "0_0_99.data");
fs.create(testPath);
writeRandomData(testPath, 99);
set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
assertEquals(set.getNumChunks(), 2);
assertEquals(set.getDataFileSize(0), 99);
assertEquals(set.getDataFileSize(1), 100);
// 3) Add some more files
testPath = new Path(headPath, "0_0_1.data");
fs.create(testPath);
writeRandomData(testPath, 1);
testPath = new Path(headPath, "0_0_10.data");
fs.create(testPath);
writeRandomData(testPath, 10);
testPath = new Path(headPath, "0_0_999.data");
fs.create(testPath);
writeRandomData(testPath, 999);
testPath = new Path(headPath, "0_0_101.data");
fs.create(testPath);
writeRandomData(testPath, 101);
testPath = new Path(headPath, "0_0_1000.data");
fs.create(testPath);
writeRandomData(testPath, 1000);
set = HadoopStoreBuilderUtils.getDataFileChunkSet(fs, HadoopStoreBuilderUtils.getDataChunkFiles(fs, headPath, 0, 0));
assertEquals(set.getNumChunks(), 7);
assertEquals(set.getDataFileSize(0), 1);
assertEquals(set.getDataFileSize(1), 10);
assertEquals(set.getDataFileSize(2), 99);
assertEquals(set.getDataFileSize(3), 100);
assertEquals(set.getDataFileSize(4), 101);
assertEquals(set.getDataFileSize(5), 999);
assertEquals(set.getDataFileSize(6), 1000);
}
use of voldemort.store.readonly.chunk.DataFileChunkSet in project voldemort by voldemort.
the class HadoopStoreBuilderUtils method getDataFileChunkSet.
/**
* Convert list of FileStatus[] files to DataFileChunkSet. The input to this
* is generally the output of getChunkFiles function.
*
* Works only for {@link ReadOnlyStorageFormat.READONLY_V2}
*
* @param fs Filesystem used
* @param files List of data chunk files
* @return DataFileChunkSet Returns the corresponding data chunk set
* @throws IOException
*/
public static DataFileChunkSet getDataFileChunkSet(FileSystem fs, FileStatus[] files) throws IOException {
// Make sure it satisfies the partitionId_replicaType format
List<FileStatus> fileList = Lists.newArrayList();
for (FileStatus file : files) {
if (!ReadOnlyUtils.isFormatCorrect(file.getPath().getName(), ReadOnlyStorageFormat.READONLY_V2)) {
throw new VoldemortException("Incorrect data file name format for " + file.getPath().getName() + ". Unsupported by " + ReadOnlyStorageFormat.READONLY_V2);
}
fileList.add(file);
}
// Return it in sorted order
Collections.sort(fileList, new Comparator<FileStatus>() {
public int compare(FileStatus f1, FileStatus f2) {
int chunkId1 = ReadOnlyUtils.getChunkId(f1.getPath().getName());
int chunkId2 = ReadOnlyUtils.getChunkId(f2.getPath().getName());
return chunkId1 - chunkId2;
}
});
List<DataFileChunk> dataFiles = Lists.newArrayList();
List<Integer> dataFileSizes = Lists.newArrayList();
for (FileStatus file : fileList) {
dataFiles.add(new HdfsDataFileChunk(fs, file));
dataFileSizes.add((int) file.getLen());
}
return new DataFileChunkSet(dataFiles, dataFileSizes);
}
Aggregations