Search in sources :

Example 1 with IvaratorCacheDirConfig

use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.

the class IvaratorInterruptTest method setup.

@Before
public void setup() throws IOException {
    TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
    logic.setFullTableScanEnabled(true);
    // this should force regex expansion into ivarators
    logic.setMaxValueExpansionThreshold(1);
    // setup the hadoop configuration
    URL hadoopConfig = this.getClass().getResource("/testhadoop.config");
    logic.setHdfsSiteConfigURLs(hadoopConfig.toExternalForm());
    // setup a directory for cache results
    File tmpDir = temporaryFolder.newFolder();
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(tmpDir.toURI().toString());
    logic.setIvaratorCacheDirConfigs(Collections.singletonList(config));
    deserializer = new KryoDocumentDeserializer();
}
Also used : KryoDocumentDeserializer(datawave.query.function.deserializer.KryoDocumentDeserializer) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) File(java.io.File) URL(java.net.URL) Before(org.junit.Before)

Example 2 with IvaratorCacheDirConfig

use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexIteratorJexlTest method setup.

@Before
public void setup() throws IOException {
    File cacheDir = temporaryFolder.newFolder();
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(cacheDir.toURI().toString());
    fs = FileSystem.get(cacheDir.toURI(), new Configuration());
    File queryDirFile = new File(cacheDir, "query");
    queryDirFile.deleteOnExit();
    Assert.assertTrue(queryDirFile.mkdirs());
    String queryDir = queryDirFile.toURI().toString();
    cacheDirs = Collections.singletonList(new IvaratorCacheDir(config, fs, queryDir));
}
Also used : IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) Configuration(org.apache.hadoop.conf.Configuration) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) File(java.io.File) Before(org.junit.Before)

Example 3 with IvaratorCacheDirConfig

use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.

the class QueryIteratorIT method setup.

@Before
public void setup() throws IOException {
    iterator = new QueryIterator();
    options = new HashMap<>();
    tempPath = temporaryFolder.newFolder().toPath();
    // global options
    // force serial pipelines
    options.put(SERIAL_EVALUATION_PIPELINE, "true");
    options.put(ALLOW_FIELD_INDEX_EVALUATION, "true");
    options.put(ALLOW_TERM_FREQUENCY_LOOKUP, "true");
    // set the indexed fields list
    options.put(INDEXED_FIELDS, "EVENT_FIELD1,EVENT_FIELD4,EVENT_FIELD6,TF_FIELD0,TF_FIELD1,TF_FIELD2,INDEX_ONLY_FIELD1,INDEX_ONLY_FIELD2,INDEX_ONLY_FIELD3");
    // set the unindexed fields list
    options.put(NON_INDEXED_DATATYPES, DEFAULT_DATATYPE + ":EVENT_FIELD2,EVENT_FIELD3,EVENT_FIELD5");
    // set a query id
    options.put(QUERY_ID, "000001");
    // setup ivarator settings
    IvaratorCacheDirConfig config = new IvaratorCacheDirConfig("file://" + tempPath.toAbsolutePath().toString());
    options.put(IVARATOR_CACHE_DIR_CONFIG, IvaratorCacheDirConfig.toJson(config));
    URL hdfsSiteConfig = this.getClass().getResource("/testhadoop.config");
    options.put(HDFS_SITE_CONFIG_URLS, hdfsSiteConfig.toExternalForm());
    // query time range
    options.put(START_TIME, "10");
    options.put(END_TIME, "100");
    // these will be marked as indexed fields
    typeMetadata = new TypeMetadata();
    typeMetadata.put("EVENT_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("EVENT_FIELD4", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("EVENT_FIELD6", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("TF_FIELD0", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("TF_FIELD1", DEFAULT_DATATYPE, CommaFreeType.class.getName());
    typeMetadata.put("TF_FIELD2", DEFAULT_DATATYPE, CommaFreeType.class.getName());
    typeMetadata.put("INDEX_ONLY_FIELD1", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("INDEX_ONLY_FIELD2", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    typeMetadata.put("INDEX_ONLY_FIELD3", DEFAULT_DATATYPE, "datawave.data.type.LcNoDiacriticsType");
    environment = createMock(IteratorEnvironment.class);
    EasyMock.expect(environment.getConfig()).andReturn(DefaultConfiguration.getInstance()).anyTimes();
    filter = createMock(EventDataQueryFilter.class);
}
Also used : IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) URL(java.net.URL) Before(org.junit.Before)

Example 4 with IvaratorCacheDirConfig

use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method getIvaratorCacheDirs.

/**
 * Build a list of potential hdfs directories based on each ivarator cache dir configs.
 *
 * @return A path
 */
private List<IvaratorCacheDir> getIvaratorCacheDirs() throws IOException {
    List<IvaratorCacheDir> pathAndFs = new ArrayList<>();
    // first lets increment the count for a unique subdirectory
    String subdirectory = ivaratorCacheSubDirPrefix + "term" + Integer.toString(++ivaratorCount);
    if (ivaratorCacheDirConfigs != null && !ivaratorCacheDirConfigs.isEmpty()) {
        for (IvaratorCacheDirConfig config : ivaratorCacheDirConfigs) {
            // first, make sure the cache configuration is valid
            if (config.isValid()) {
                Path path = new Path(config.getBasePathURI(), queryId);
                if (scanId == null) {
                    log.warn("Running query iterator for " + queryId + " without a scan id.  This could cause ivarator directory conflicts.");
                } else {
                    path = new Path(path, scanId);
                }
                path = new Path(path, subdirectory);
                URI uri = path.toUri();
                pathAndFs.add(new IvaratorCacheDir(config, hdfsFileSystem.getFileSystem(uri), uri.toString()));
            }
        }
    }
    if (pathAndFs.isEmpty())
        throw new IOException("Unable to find a usable hdfs cache dir out of " + ivaratorCacheDirConfigs);
    return pathAndFs;
}
Also used : Path(org.apache.hadoop.fs.Path) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) URI(java.net.URI)

Example 5 with IvaratorCacheDirConfig

use of datawave.query.iterator.ivarator.IvaratorCacheDirConfig in project datawave by NationalSecurityAgency.

the class AbstractFunctionalQuery method ivaratorConfig.

/**
 * Configures the Ivarator cache. Each cache directory is created as a separate directory.
 *
 * @param hdfsLocations
 *            number of HDFS locations to configure
 * @param fst
 *            when true screate a FST ivarator cache
 * @throws IOException
 *             error creating HDFS cache directory
 */
protected List<String>[] ivaratorConfig(final int hdfsLocations, final boolean fst) throws IOException {
    final URL hdfsConfig = this.getClass().getResource("/testhadoop.config");
    Assert.assertNotNull(hdfsConfig);
    this.logic.setHdfsSiteConfigURLs(hdfsConfig.toExternalForm());
    final List<String> dirs = new ArrayList<>();
    final List<String> fstDirs = new ArrayList<>();
    for (int d = 1; d <= hdfsLocations; d++) {
        Path ivCache = Paths.get(temporaryFolder.newFolder().toURI());
        dirs.add(ivCache.toUri().toString());
        if (fst) {
            ivCache = Paths.get(temporaryFolder.newFolder().toURI());
            fstDirs.add(ivCache.toAbsolutePath().toString());
        }
    }
    String uriList = String.join(",", dirs);
    log.info("hdfs dirs(" + uriList + ")");
    this.logic.setIvaratorCacheDirConfigs(dirs.stream().map(IvaratorCacheDirConfig::new).collect(Collectors.toList()));
    if (fst) {
        uriList = String.join(",", fstDirs);
        log.info("fst dirs(" + uriList + ")");
        this.logic.setIvaratorFstHdfsBaseURIs(uriList);
    }
    return new List[] { dirs, fstDirs };
}
Also used : Path(java.nio.file.Path) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) URL(java.net.URL)

Aggregations

IvaratorCacheDirConfig (datawave.query.iterator.ivarator.IvaratorCacheDirConfig)18 URL (java.net.URL)7 File (java.io.File)6 InMemoryInstance (datawave.accumulo.inmemory.InMemoryInstance)5 IvaratorCacheDir (datawave.query.iterator.ivarator.IvaratorCacheDir)5 ArrayList (java.util.ArrayList)5 Configuration (org.apache.hadoop.conf.Configuration)5 Text (org.apache.hadoop.io.Text)5 BeforeClass (org.junit.BeforeClass)5 HashMultimap (com.google.common.collect.HashMultimap)4 Multimap (com.google.common.collect.Multimap)4 RawRecordContainerImpl (datawave.ingest.config.RawRecordContainerImpl)4 RawRecordContainer (datawave.ingest.data.RawRecordContainer)4 Type (datawave.ingest.data.Type)4 NormalizedContentInterface (datawave.ingest.data.config.NormalizedContentInterface)4 NormalizedFieldAndValue (datawave.ingest.data.config.NormalizedFieldAndValue)4 BulkIngestKey (datawave.ingest.mapreduce.job.BulkIngestKey)4 MockStatusReporter (datawave.query.testframework.MockStatusReporter)4 Connector (org.apache.accumulo.core.client.Connector)4 Value (org.apache.accumulo.core.data.Value)4