Search in sources :

Example 6 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class HdfsBackedSortedSetTest method persistCompactReloadTest.

@Test
public void persistCompactReloadTest() throws Exception {
    File tempDir = temporaryFolder.newFolder();
    File[] dirs = new File[] { new File(tempDir, "first"), new File(tempDir, "second"), new File(tempDir, "third") };
    for (File dir : dirs) Assert.assertTrue(dir.mkdirs());
    String uniquePath = "blah";
    Path[] paths = Arrays.stream(dirs).map(dir -> new Path(dir.toURI().toString())).toArray(Path[]::new);
    Path[] subPaths = Arrays.stream(paths).map(path -> new Path(path, uniquePath)).toArray(Path[]::new);
    LocalFileSystem fs = new LocalFileSystem();
    fs.initialize(tempDir.toURI(), new Configuration());
    // set the min remaining percent to something which will cause the second directory to be skipped
    double minRemainingPercent = 1.0;
    List<IvaratorCacheDir> ivaratorCacheDirs = new ArrayList<>();
    for (File dir : dirs) {
        if (dir.getName().equalsIgnoreCase("second"))
            ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(dir.toURI().toString(), 0, minRemainingPercent), fs, dir.toURI().toString()));
        else
            ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(dir.toURI().toString(), 1), fs, dir.toURI().toString()));
    }
    HdfsBackedSortedSet<String> firstSortedSet = new HdfsBackedSortedSet<>(Collections.singletonList(ivaratorCacheDirs.get(0)), uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
    // Add an entry to the first sorted set
    String someTestString = "some test string";
    firstSortedSet.add(someTestString);
    // persist the sorted set
    firstSortedSet.persist();
    HdfsBackedSortedSet<String> thirdSortedSet = new HdfsBackedSortedSet<>(Collections.singletonList(ivaratorCacheDirs.get(2)), uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
    // Add an entry to the third sorted set
    String anotherTestString = "another test string";
    thirdSortedSet.add(anotherTestString);
    // persist the sorted set
    thirdSortedSet.persist();
    // ensure that data was written to the first and third folders
    Assert.assertTrue(fs.exists(subPaths[0]));
    Assert.assertTrue(fs.exists(subPaths[2]));
    // ensure that data was not written to the second folder
    Assert.assertFalse(fs.exists(subPaths[1]));
    Assert.assertEquals(0, fs.listStatus(paths[1]).length);
    // ensure that 1 file was written to the first folder
    FileStatus[] fileStatuses = fs.listStatus(subPaths[0]);
    Assert.assertEquals(1, fileStatuses.length);
    Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet"));
    // ensure that 1 file was written to the third folder
    fileStatuses = fs.listStatus(subPaths[2]);
    Assert.assertEquals(1, fileStatuses.length);
    Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet"));
    // Now make sure reloading an ivarator cache dir works, and set maxOpenFiles to 1 so that we compact during the next persist
    HdfsBackedSortedSet<String> reloadedSortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 1, 2, new FileSortedSet.PersistOptions());
    // Ensure that we have 2 entries total
    Assert.assertEquals(2, reloadedSortedSet.size());
    // This is what we expect to be loaded by the set
    List<String> results = new ArrayList<>();
    results.add(someTestString);
    results.add(anotherTestString);
    // for each result we find, remove it from the results list and ensure that the list is empty when we're done
    reloadedSortedSet.iterator().forEachRemaining(results::remove);
    Assert.assertTrue(results.isEmpty());
    // Finally, add an entry to the reloaded sorted set
    String lastTestString = "last test string";
    reloadedSortedSet.add(lastTestString);
    // persist the sorted set (this should cause a compaction down to 1 file)
    reloadedSortedSet.persist();
    // ensure that data was not written to the second folder
    Assert.assertFalse(fs.exists(subPaths[1]));
    Assert.assertEquals(0, fs.listStatus(paths[1]).length);
    // ensure that while the folder still exists, data no longer exists for the third folder
    Assert.assertTrue(fs.exists(subPaths[2]));
    Assert.assertEquals(0, fs.listStatus(subPaths[2]).length);
    // ensure that all data exists in the first folder
    fileStatuses = fs.listStatus(subPaths[0]);
    Assert.assertEquals(1, fileStatuses.length);
    Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet"));
    // Finally, make sure that the compacted data can be reloaded
    HdfsBackedSortedSet<String> compactedSortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
    // Ensure that we have 3 entries total
    Assert.assertEquals(3, compactedSortedSet.size());
    // This is what we expect to be loaded by the set
    results.clear();
    results.add(someTestString);
    results.add(anotherTestString);
    results.add(lastTestString);
    // for each result we find, remove it from the results list and ensure that the list is empty when we're done
    compactedSortedSet.iterator().forEachRemaining(results::remove);
    Assert.assertTrue(results.isEmpty());
}
Also used : Path(org.apache.hadoop.fs.Path) Arrays(java.util.Arrays) Test(org.junit.Test) FileStatus(org.apache.hadoop.fs.FileStatus) File(java.io.File) ArrayList(java.util.ArrayList) FsStatus(org.apache.hadoop.fs.FsStatus) List(java.util.List) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) Rule(org.junit.Rule) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) Assert(org.junit.Assert) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) File(java.io.File) Test(org.junit.Test)

Aggregations

IvaratorCacheDir (datawave.query.iterator.ivarator.IvaratorCacheDir)6 IvaratorCacheDirConfig (datawave.query.iterator.ivarator.IvaratorCacheDirConfig)5 Path (org.apache.hadoop.fs.Path)4 File (java.io.File)3 ArrayList (java.util.ArrayList)3 Configuration (org.apache.hadoop.conf.Configuration)3 IOException (java.io.IOException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 FsStatus (org.apache.hadoop.fs.FsStatus)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 Test (org.junit.Test)2 URI (java.net.URI)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Text (org.apache.hadoop.io.Text)1 Assert (org.junit.Assert)1 Before (org.junit.Before)1 Rule (org.junit.Rule)1