use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexIteratorJexlTest method setup.
@Before
public void setup() throws IOException {
File cacheDir = temporaryFolder.newFolder();
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(cacheDir.toURI().toString());
fs = FileSystem.get(cacheDir.toURI(), new Configuration());
File queryDirFile = new File(cacheDir, "query");
queryDirFile.deleteOnExit();
Assert.assertTrue(queryDirFile.mkdirs());
String queryDir = queryDirFile.toURI().toString();
cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, queryDir));
}
use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method getIvaratorCacheDirs.
/**
* Build a list of potential hdfs directories based on each ivarator cache dir configs.
*
* @return A path
*/
private List<IvaratorCacheDir> getIvaratorCacheDirs() throws IOException {
List<IvaratorCacheDir> pathAndFs = new ArrayList<>();
// first lets increment the count for a unique subdirectory
String subdirectory = ivaratorCacheSubDirPrefix + "term" + Integer.toString(++ivaratorCount);
if (ivaratorCacheDirConfigs != null && !ivaratorCacheDirConfigs.isEmpty()) {
for (IvaratorCacheDirConfig config : ivaratorCacheDirConfigs) {
// first, make sure the cache configuration is valid
if (config.isValid()) {
Path path = new Path(config.getBasePathURI(), queryId);
if (scanId == null) {
log.warn("Running query iterator for " + queryId + " without a scan id. This could cause ivarator directory conflicts.");
} else {
path = new Path(path, scanId);
}
path = new Path(path, subdirectory);
URI uri = path.toUri();
pathAndFs.add(new IvaratorCacheDir(config, hdfsFileSystem.getFileSystem(uri), uri.toString()));
}
}
}
if (pathAndFs.isEmpty())
throw new IOException("Unable to find a usable hdfs cache dir out of " + ivaratorCacheDirConfigs);
return pathAndFs;
}
use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexCachingIteratorJexl method setupRowBasedHdfsBackedSet.
/**
* This will setup the set for the specified range. This will attempt to reuse precomputed and persisted sets if we are allowed to.
*
* @param row
* @throws IOException
*/
protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
// we are done if cancelled
if (this.setControl.isCancelledQuery()) {
return;
}
try {
// for each of the ivarator cache dirs
for (IvaratorCacheDir ivaratorCacheDir : ivaratorCacheDirs) {
// get the row specific dir
Path rowDir = getRowDir(new Path(ivaratorCacheDir.getPathURI()), row);
FileSystem fs = ivaratorCacheDir.getFs();
// if we are not allowing reuse of directories, then delete it
if (!allowDirReuse && fs.exists(rowDir)) {
fs.delete(rowDir, true);
}
}
// ensure the control directory is created
Path controlRowDir = getRowDir(this.controlDir, row);
if (!this.controlFs.exists(controlRowDir)) {
this.controlFs.mkdirs(controlRowDir);
this.createdRowDir = true;
} else {
this.createdRowDir = false;
}
this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions, new FileKeySortedSet.Factory());
this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
this.currentRow = row;
this.setControl.takeOwnership(row, this);
// if this set is not marked as complete (meaning completely filled AND persisted), then we cannot trust the contents and we need to recompute.
if (!this.setControl.isCompleteAndPersisted(row)) {
this.set.clear();
this.keys = null;
} else {
this.keys = new CachingIterator<>(this.set.iterator());
}
// reset the keyValues counter as we have a new set here
scannedKeys.set(0);
} catch (IOException ioe) {
throw new IllegalStateException("Unable to create Hdfs backed sorted set", ioe);
}
}
use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.
the class IvaratorReloadTest method createRangeIvarator.
public static DatawaveFieldIndexRangeIteratorJexl createRangeIvarator(FileSystem fs, Path uniqueDir) {
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(uniqueDir.toUri().toString());
List<IvaratorCacheDir> cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, uniqueDir.toUri().toString()));
// @formatter:off
return DatawaveFieldIndexRangeIteratorJexl.builder().withFieldName(new Text("POINT")).withLowerBound("1f1bfaa80000000000").lowerInclusive(true).withUpperBound("1f240557ffffffffff").upperInclusive(true).withTimeFilter(null).withDatatypeFilter(null).negated(false).withScanThreshold(1).withScanTimeout(3600000).withHdfsBackedSetBufferSize(10000).withMaxRangeSplit(1).withMaxOpenFiles(100).withIvaratorCacheDirs(cacheDirs).withQueryLock(null).allowDirResuse(true).withReturnKeyType(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME).withSortedUUIDs(true).withCompositeMetadata(null).withCompositeSeekThreshold(10).withTypeMetadata(null).withSubRanges(null).withIvaratorSourcePool(createIvaratorSourcePool(10)).build();
// @formatter:on
}
use of datawave.query.iterator.ivarator.IvaratorCacheDir in project datawave by NationalSecurityAgency.
the class HdfsBackedSortedSetTest method persistReloadTest.
@Test
public void persistReloadTest() throws Exception {
File tempDir = temporaryFolder.newFolder();
File smallDir = new File(tempDir, "small");
Assert.assertTrue(smallDir.mkdirs());
File largeDir = new File(tempDir, "large");
Assert.assertTrue(largeDir.mkdirs());
LocalFileSystem fs = new LocalFileSystem();
fs.initialize(tempDir.toURI(), new Configuration());
FsStatus fsStatus = fs.getStatus();
// set the min remaining MB to something which will cause the 'small' directiory to be skipped
long minRemainingMB = (fsStatus.getRemaining() / 0x100000L) + 4096l;
List<IvaratorCacheDir> ivaratorCacheDirs = new ArrayList<>();
ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(smallDir.toURI().toString(), 0, minRemainingMB), fs, smallDir.toURI().toString()));
ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(largeDir.toURI().toString()), fs, largeDir.toURI().toString()));
String uniquePath = "blah";
HdfsBackedSortedSet<String> sortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
// Add an entry to the sorted set
String someTestString = "some test string";
sortedSet.add(someTestString);
// persist the sorted set
sortedSet.persist();
Path smallPath = new Path(smallDir.toURI().toString());
Path smallSubPath = new Path(smallPath, uniquePath);
Path largePath = new Path(largeDir.toURI().toString());
Path largeSubPath = new Path(largePath, uniquePath);
// ensure that data was written to the large folder, not the small folder
Assert.assertFalse(fs.exists(smallSubPath));
Assert.assertEquals(0, fs.listStatus(smallPath).length);
Assert.assertTrue(fs.exists(largeSubPath));
FileStatus[] fileStatuses = fs.listStatus(largeSubPath);
Assert.assertEquals(1, fileStatuses.length);
Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet"));
// Now make sure reloading an ivarator cache dir works
HdfsBackedSortedSet<String> reloadedSortedSet = new HdfsBackedSortedSet<>(ivaratorCacheDirs, uniquePath, 9999, 2, new FileSortedSet.PersistOptions());
Assert.assertEquals(1, reloadedSortedSet.size());
Assert.assertEquals(someTestString, reloadedSortedSet.first());
}
Aggregations