Search in sources :

Example 1 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexIteratorJexlTest method setup.

@Before
public void setup() throws IOException {
    File cacheDir = temporaryFolder.newFolder();
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(cacheDir.toURI().toString());
    fs = FileSystem.get(cacheDir.toURI(), new Configuration());
    File queryDirFile = new File(cacheDir, "query");
    queryDirFile.deleteOnExit();
    Assert.assertTrue(queryDirFile.mkdirs());
    String queryDir = queryDirFile.toURI().toString();
    cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, queryDir));
}
Also used : IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) Configuration(org.apache.hadoop.conf.Configuration) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) File(java.io.File) Before(org.junit.Before)

Example 2 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method getIvaratorCacheDirs.

/**
 * Build a list of potential hdfs directories based on each ivarator cache dir configs.
 *
 * @return A path
 */
private List<IvaratorCacheDir> getIvaratorCacheDirs() throws IOException {
    List<IvaratorCacheDir> pathAndFs = new ArrayList<>();
    // first lets increment the count for a unique subdirectory
    String subdirectory = ivaratorCacheSubDirPrefix + "term" + Integer.toString(++ivaratorCount);
    if (ivaratorCacheDirConfigs != null && !ivaratorCacheDirConfigs.isEmpty()) {
        for (IvaratorCacheDirConfig config : ivaratorCacheDirConfigs) {
            // first, make sure the cache configuration is valid
            if (config.isValid()) {
                Path path = new Path(config.getBasePathURI(), queryId);
                if (scanId == null) {
                    log.warn("Running query iterator for " + queryId + " without a scan id.  This could cause ivarator directory conflicts.");
                } else {
                    path = new Path(path, scanId);
                }
                path = new Path(path, subdirectory);
                URI uri = path.toUri();
                pathAndFs.add(new IvaratorCacheDir(config, hdfsFileSystem.getFileSystem(uri), uri.toString()));
            }
        }
    }
    if (pathAndFs.isEmpty())
        throw new IOException("Unable to find a usable hdfs cache dir out of " + ivaratorCacheDirConfigs);
    return pathAndFs;
}
Also used : Path(org.apache.hadoop.fs.Path) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) URI(java.net.URI)

Example 3 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method setupRowBasedHdfsBackedSet.

/**
 * This will setup the set for the specified range. This will attempt to reuse precomputed and persisted sets if we are allowed to.
 *
 * @param row
 * @throws IOException
 */
protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
    // we are done if cancelled
    if (this.setControl.isCancelledQuery()) {
        return;
    }
    try {
        // for each of the ivarator cache dirs
        for (IvaratorCacheDir ivaratorCacheDir : ivaratorCacheDirs) {
            // get the row specific dir
            Path rowDir = getRowDir(new Path(ivaratorCacheDir.getPathURI()), row);
            FileSystem fs = ivaratorCacheDir.getFs();
            // if we are not allowing reuse of directories, then delete it
            if (!allowDirReuse && fs.exists(rowDir)) {
                fs.delete(rowDir, true);
            }
        }
        // ensure the control directory is created
        Path controlRowDir = getRowDir(this.controlDir, row);
        if (!this.controlFs.exists(controlRowDir)) {
            this.controlFs.mkdirs(controlRowDir);
            this.createdRowDir = true;
        } else {
            this.createdRowDir = false;
        }
        this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions, new FileKeySortedSet.Factory());
        this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
        this.currentRow = row;
        this.setControl.takeOwnership(row, this);
        // if this set is not marked as complete (meaning completely filled AND persisted), then we cannot trust the contents and we need to recompute.
        if (!this.setControl.isCompleteAndPersisted(row)) {
            this.set.clear();
            this.keys = null;
        } else {
            this.keys = new CachingIterator<>(this.set.iterator());
        }
        // reset the keyValues counter as we have a new set here
        scannedKeys.set(0);
    } catch (IOException ioe) {
        throw new IllegalStateException("Unable to create Hdfs backed sorted set", ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir)

Example 4 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class IvaratorReloadTest method createRangeIvarator.

public static DatawaveFieldIndexRangeIteratorJexl createRangeIvarator(FileSystem fs, Path uniqueDir) {
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(uniqueDir.toUri().toString());
    List<IvaratorCacheDir> cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, uniqueDir.toUri().toString()));
    // @formatter:off
    return DatawaveFieldIndexRangeIteratorJexl.builder().withFieldName(new Text("POINT")).withLowerBound("1f1bfaa80000000000").lowerInclusive(true).withUpperBound("1f240557ffffffffff").upperInclusive(true).withTimeFilter(null).withDatatypeFilter(null).negated(false).withScanThreshold(1).withScanTimeout(3600000).withHdfsBackedSetBufferSize(10000).withMaxRangeSplit(1).withMaxOpenFiles(100).withIvaratorCacheDirs(cacheDirs).withQueryLock(null).allowDirResuse(true).withReturnKeyType(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME).withSortedUUIDs(true).withCompositeMetadata(null).withCompositeSeekThreshold(10).withTypeMetadata(null).withSubRanges(null).withIvaratorSourcePool(createIvaratorSourcePool(10)).build();
// @formatter:on
}
Also used : IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) Text(org.apache.hadoop.io.Text) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir)

Example 5 with IvaratorCacheDir

use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.

the class HdfsBackedSortedSetTest method persistReloadTest.

@Test
public void persistReloadTest() throws Exception {
    File tempDir = temporaryFolder.newFolder();
    File smallDir = new File(tempDir, "small");
    Assert.assertTrue(smallDir.mkdirs());
    File largeDir = new File(tempDir, "large");
    Assert.assertTrue(largeDir.mkdirs());
    LocalFileSystem fs = new LocalFileSystem();
    fs.initialize(tempDir.toURI(), new Configuration());
    FsStatus fsStatus = fs.getStatus();
    // set the min remaining MB to something which will cause the 'small' directiory to be skipped
    long minRemainingMB = (fsStatus.getRemaining() / 0x100000L) + 4096l;
    List<IvaratorCacheDir> ivaratorCacheDirs = new ArrayList<>();
    ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(smallDir.toURI().toString(), 0, minRemainingMB), fs, smallDir.toURI().toString()));
    ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(largeDir.toURI().toString()), fs, largeDir.toURI().toString()));
    String uniquePath = "blah";
    HdfsBackedSortedSet<String> sortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
    // Add an entry to the sorted set
    String someTestString = "some test string";
    sortedSet.add(someTestString);
    // persist the sorted set
    sortedSet.persist();
    Path smallPath = new Path(smallDir.toURI().toString());
    Path smallSubPath = new Path(smallPath, uniquePath);
    Path largePath = new Path(largeDir.toURI().toString());
    Path largeSubPath = new Path(largePath, uniquePath);
    // ensure that data was written to the large folder, not the small folder
    Assert.assertFalse(fs.exists(smallSubPath));
    Assert.assertEquals(0, fs.listStatus(smallPath).length);
    Assert.assertTrue(fs.exists(largeSubPath));
    FileStatus[] fileStatuses = fs.listStatus(largeSubPath);
    Assert.assertEquals(1, fileStatuses.length);
    Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet"));
    // Now make sure reloading an ivarator cache dir works
    HdfsBackedSortedSet<String> reloadedSortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
    Assert.assertEquals(1, reloadedSortedSet.size());
    Assert.assertEquals(someTestString, reloadedSortedSet.first());
}
Also used : Path(org.apache.hadoop.fs.Path) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) FsStatus(org.apache.hadoop.fs.FsStatus) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) File(java.io.File) Test(org.junit.Test)

Aggregations

IvaratorCacheDir (datawave.query.iterator.ivarator.IvaratorCacheDir)6 IvaratorCacheDirConfig (datawave.query.iterator.ivarator.IvaratorCacheDirConfig)5 Path (org.apache.hadoop.fs.Path)4 File (java.io.File)3 ArrayList (java.util.ArrayList)3 Configuration (org.apache.hadoop.conf.Configuration)3 IOException (java.io.IOException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 FsStatus (org.apache.hadoop.fs.FsStatus)2 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)2 Test (org.junit.Test)2 URI (java.net.URI)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Text (org.apache.hadoop.io.Text)1 Assert (org.junit.Assert)1 Before (org.junit.Before)1 Rule (org.junit.Rule)1