use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.
the class IvaratorInterruptTest method setup.
@Before
public void setup() throws IOException {
TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
logic.setFullTableScanEnabled(true);
// this should force regex expansion into ivarators
logic.setMaxValueExpansionThreshold(1);
// setup the hadoop configuration
URL hadoopConfig = this.getClass().getResource("/testhadoop.config");
logic.setHdfsSiteConfigURLs(hadoopConfig.toExternalForm());
// setup a directory for cache results
File tmpDir = temporaryFolder.newFolder();
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(tmpDir.toURI().toString());
logic.setIvaratorCacheDirConfigs(Collections.singletonList(config));
deserializer = new KryoDocumentDeserializer();
}
use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexIteratorJexlTest method setup.
@Before
public void setup() throws IOException {
File cacheDir = temporaryFolder.newFolder();
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(cacheDir.toURI().toString());
fs = FileSystem.get(cacheDir.toURI(), new Configuration());
File queryDirFile = new File(cacheDir, "query");
queryDirFile.deleteOnExit();
Assert.assertTrue(queryDirFile.mkdirs());
String queryDir = queryDirFile.toURI().toString();
cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, queryDir));
}
use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.
the class QueryIteratorIT method setup.
@Before
public void setup() throws IOException {
iterator = new QueryIterator();
options = new HashMap<>();
tempPath = temporaryFolder.newFolder().toPath();
// global options
// force serial pipelines
options.put(SERIAL_EVALUATION_PIPELINE, "true");
options.put(ALLOW_FIELD_INDEX_EVALUATION, "true");
options.put(ALLOW_TERM_FREQUENCY_LOOKUP, "true");
// set the indexed fields list
options.put(INDEXED_FIELDS, "EVENT_FIELD1,EVENT_FIELD4,EVENT_FIELD6,TF_FIELD0,TF_FIELD1,TF_FIELD2,INDEX_ONLY_FIELD1,INDEX_ONLY_FIELD2,INDEX_ONLY_FIELD3");
// set the unindexed fields list
options.put(NON_INDEXED_DATATYPES, DEFAULT_DATATYPE + ":EVENT_FIELD2,EVENT_FIELD3,EVENT_FIELD5");
// set a query id
options.put(QUERY_ID, "000001");
// setup ivarator settings
IvaratorCacheDirConfig config = new IvaratorCacheDirConfig("file://" + tempPath.toAbsolutePath().toString());
options.put(IVARATOR_CACHE_DIR_CONFIG, IvaratorCacheDirConfig.toJson(config));
URL hdfsSiteConfig = this.getClass().getResource("/testhadoop.config");
options.put(HDFS_SITE_CONFIG_URLS, hdfsSiteConfig.toExternalForm());
// query time range
options.put(START_TIME, "10");
options.put(END_TIME, "100");
// these will be marked as indexed fields
typeMetadata = new TypeMetadata();
typeMetadata.put("EVENT_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("EVENT_FIELD4", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("EVENT_FIELD6", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("TF_FIELD0", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("TF_FIELD1", DEFAULT_DATATYPE, CommaFreeType.class.getName());
typeMetadata.put("TF_FIELD2", DEFAULT_DATATYPE, CommaFreeType.class.getName());
typeMetadata.put("INDEX_ONLY_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("INDEX_ONLY_FIELD2", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
typeMetadata.put("INDEX_ONLY_FIELD3", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
environment = createMock(IteratorEnvironment.class);
EasyMock.expect(environment.getConfig()).andReturn(DefaultConfiguration.getInstance()).anyTimes();
filter = createMock(EventDataQueryFilter.class);
}
use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method getIvaratorCacheDirs.
/**
* Build a list of potential hdfs directories based on each ivarator cache dir configs.
*
* @return A path
*/
private List<IvaratorCacheDir> getIvaratorCacheDirs() throws IOException {
List<IvaratorCacheDir> pathAndFs = new ArrayList<>();
// first lets increment the count for a unique subdirectory
String subdirectory = ivaratorCacheSubDirPrefix + "term" + Integer.toString(++ivaratorCount);
if (ivaratorCacheDirConfigs != null && !ivaratorCacheDirConfigs.isEmpty()) {
for (IvaratorCacheDirConfig config : ivaratorCacheDirConfigs) {
// first, make sure the cache configuration is valid
if (config.isValid()) {
Path path = new Path(config.getBasePathURI(), queryId);
if (scanId == null) {
log.warn("Running query iterator for " + queryId + " without a scan id. This could cause ivarator directory conflicts.");
} else {
path = new Path(path, scanId);
}
path = new Path(path, subdirectory);
URI uri = path.toUri();
pathAndFs.add(new IvaratorCacheDir(config, hdfsFileSystem.getFileSystem(uri), uri.toString()));
}
}
}
if (pathAndFs.isEmpty())
throw new IOException("Unable to find a usable hdfs cache dir out of " + ivaratorCacheDirConfigs);
return pathAndFs;
}
use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.
the class AbstractFunctionalQuery method ivaratorConfig.
/**
* Configures the Ivarator cache. Each cache directory is created as a separate directory.
*
* @param hdfsLocations
* number of HDFS locations to configure
* @param fst
* when true screate a FST ivarator cache
* @throws IOException
* error creating HDFS cache directory
*/
protected List<String>[] ivaratorConfig(final int hdfsLocations, final boolean fst) throws IOException {
final URL hdfsConfig = this.getClass().getResource("/testhadoop.config");
Assert.assertNotNull(hdfsConfig);
this.logic.setHdfsSiteConfigURLs(hdfsConfig.toExternalForm());
final List<String> dirs = new ArrayList<>();
final List<String> fstDirs = new ArrayList<>();
for (int d = 1; d <= hdfsLocations; d++) {
Path ivCache = Paths.get(temporaryFolder.newFolder().toURI());
dirs.add(ivCache.toUri().toString());
if (fst) {
ivCache = Paths.get(temporaryFolder.newFolder().toURI());
fstDirs.add(ivCache.toAbsolutePath().toString());
}
}
String uriList = String.join(",", dirs);
log.info("hdfs dirs(" + uriList + ")");
this.logic.setIvaratorCacheDirConfigs(dirs.stream().map(IvaratorCacheDirConfig::new).collect(Collectors.toList()));
if (fst) {
uriList = String.join(",", fstDirs);
log.info("fst dirs(" + uriList + ")");
this.logic.setIvaratorFstHdfsBaseURIs(uriList);
}
return new List[] { dirs, fstDirs };
}
Aggregations