use of datawave.query.config.ShardIndexQueryConfiguration in project datawave by NationalSecurityAgency.
the class ShardIndexQueryTable method setupQuery.
@Override
public void setupQuery(GenericQueryConfiguration genericConfig) throws QueryException, TableNotFoundException, IOException, ExecutionException {
if (!genericConfig.getClass().getName().equals(ShardIndexQueryConfiguration.class.getName())) {
throw new QueryException("Did not receive a ShardIndexQueryConfiguration instance!!");
}
ShardIndexQueryConfiguration config = (ShardIndexQueryConfiguration) genericConfig;
final List<Entry<BatchScanner, Boolean>> batchscanners = Lists.newLinkedList();
for (Entry<String, String> termEntry : config.getNormalizedTerms().entries()) {
// scan the table
BatchScanner bs = configureBatchScannerForDiscovery(config, this.scannerFactory, TableName.SHARD_INDEX, Collections.singleton(config.getRangesForTerms().get(termEntry)), Collections.singleton(termEntry.getValue()), Collections.emptySet(), config.getTableName().equals(config.getReverseIndexTableName()), false, Collections.singleton(termEntry.getKey()));
batchscanners.add(Maps.immutableEntry(bs, false));
}
for (Entry<String, String> patternEntry : config.getNormalizedPatterns().entries()) {
Entry<Range, Boolean> rangeEntry = config.getRangesForPatterns().get(patternEntry);
String tName = rangeEntry.getValue() ? TableName.SHARD_RINDEX : TableName.SHARD_INDEX;
// scan the table
BatchScanner bs = configureBatchScannerForDiscovery(config, this.scannerFactory, tName, Collections.singleton(rangeEntry.getKey()), Collections.emptySet(), Collections.singleton(patternEntry.getValue()), rangeEntry.getValue(), false, Collections.singleton(patternEntry.getKey()));
batchscanners.add(Maps.immutableEntry(bs, rangeEntry.getValue()));
}
final Iterator<Entry<BatchScanner, Boolean>> batchScannerIterator = batchscanners.iterator();
this.iterator = concat(transform(new CloseableIterator(batchScannerIterator), new Function<Entry<Key, Value>, Iterator<DiscoveredThing>>() {
DataInputBuffer in = new DataInputBuffer();
@Override
public Iterator<DiscoveredThing> apply(Entry<Key, Value> from) {
Value value = from.getValue();
in.reset(value.get(), value.getSize());
ArrayWritable aw = new ArrayWritable(DiscoveredThing.class);
try {
aw.readFields(in);
} catch (IOException e) {
return null;
}
ArrayList<DiscoveredThing> thangs = Lists.newArrayListWithCapacity(aw.get().length);
for (Writable w : aw.get()) {
thangs.add((DiscoveredThing) w);
}
return thangs.iterator();
}
}));
this.scanner = new ScannerBase() {
@Override
public void addScanIterator(IteratorSetting cfg) {
}
@Override
public void clearColumns() {
}
@Override
public void clearScanIterators() {
}
@Override
public void close() {
}
@Override
public Authorizations getAuthorizations() {
return null;
}
@Override
public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
}
@Override
public SamplerConfiguration getSamplerConfiguration() {
return null;
}
@Override
public void clearSamplerConfiguration() {
}
@Override
public void setBatchTimeout(long l, TimeUnit timeUnit) {
}
@Override
public long getBatchTimeout(TimeUnit timeUnit) {
return 0;
}
@Override
public void setClassLoaderContext(String s) {
}
@Override
public void clearClassLoaderContext() {
}
@Override
public String getClassLoaderContext() {
return null;
}
@Override
public void fetchColumn(Text colFam, Text colQual) {
}
@Override
public void fetchColumn(IteratorSetting.Column column) {
}
@Override
public void fetchColumnFamily(Text col) {
}
@Override
public long getTimeout(TimeUnit timeUnit) {
return 0;
}
@Override
public Iterator<Entry<Key, Value>> iterator() {
return null;
}
@Override
public void removeScanIterator(String iteratorName) {
}
@Override
public void setTimeout(long timeOut, TimeUnit timeUnit) {
}
@Override
public void updateScanIteratorOption(String iteratorName, String key, String value) {
}
};
}
use of datawave.query.config.ShardIndexQueryConfiguration in project datawave by NationalSecurityAgency.
the class ShardIndexQueryTable method initialize.
@Override
public GenericQueryConfiguration initialize(Connector connection, Query settings, Set<Authorizations> auths) throws Exception {
ShardIndexQueryConfiguration config = new ShardIndexQueryConfiguration(this, settings);
this.scannerFactory = new ScannerFactory(connection);
MetadataHelper metadataHelper = initializeMetadataHelper(connection, config.getMetadataTableName(), auths);
if (StringUtils.isEmpty(settings.getQuery())) {
throw new IllegalArgumentException("Query cannot be null");
}
if (log.isDebugEnabled()) {
log.debug("Query parameters set to " + settings.getParameters());
}
String tModelName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_NAME);
if (tModelName != null) {
modelName = tModelName;
}
String tModelTableName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_TABLE_NAME);
if (tModelTableName != null) {
modelTableName = tModelTableName;
}
queryModel = metadataHelper.getQueryModel(modelTableName, modelName, null);
String datatypeFilterString = getTrimmedOrNull(settings, QueryParameters.DATATYPE_FILTER_SET);
if (datatypeFilterString != null) {
config.setDatatypeFilter(new HashSet<>(Arrays.asList(datatypeFilterString.split(PARAM_VALUE_SEP_STR))));
if (log.isDebugEnabled()) {
log.debug("Data type filter set to " + config.getDatatypeFilterAsString());
}
}
config.setConnector(connection);
config.setAuthorizations(auths);
if (indexTableName != null) {
config.setIndexTableName(indexTableName);
}
if (reverseIndexTableName != null) {
config.setReverseIndexTableName(reverseIndexTableName);
}
if (settings.getBeginDate() != null) {
config.setBeginDate(settings.getBeginDate());
} else {
config.setBeginDate(new Date(0));
if (log.isDebugEnabled()) {
log.debug("No begin date supplied in settings.");
}
}
if (settings.getEndDate() != null) {
config.setEndDate(settings.getEndDate());
} else {
config.setEndDate(new Date(Long.MAX_VALUE));
if (log.isDebugEnabled()) {
log.debug("No end date supplied in settings.");
}
}
// start with a trimmed version of the query, converted to JEXL
LuceneToJexlQueryParser parser = new LuceneToJexlQueryParser();
parser.setAllowLeadingWildCard(this.isAllowLeadingWildcard());
QueryNode node = parser.parse(settings.getQuery().trim());
// TODO: Validate that this is a simple list of terms type of query
config.setQueryString(node.getOriginalQuery());
if (log.isDebugEnabled()) {
log.debug("Original Query = " + settings.getQuery().trim());
log.debug("JEXL Query = " + node.getOriginalQuery());
}
// Parse & flatten the query.
ASTJexlScript origScript = JexlASTHelper.parseAndFlattenJexlQuery(config.getQueryString());
ASTJexlScript script;
try {
script = UnfieldedIndexExpansionVisitor.expandUnfielded(config, this.scannerFactory, metadataHelper, origScript);
} catch (EmptyUnfieldedTermExpansionException e) {
Multimap<String, String> emptyMap = Multimaps.unmodifiableMultimap(HashMultimap.create());
config.setNormalizedTerms(emptyMap);
config.setNormalizedPatterns(emptyMap);
return config;
}
Set<String> dataTypes = config.getDatatypeFilter();
Set<String> allFields = metadataHelper.getAllFields(dataTypes);
script = QueryModelVisitor.applyModel(script, queryModel, allFields);
if (log.isTraceEnabled()) {
log.trace("fetching dataTypes from FetchDataTypesVisitor");
}
Multimap<String, Type<?>> fieldToDataTypeMap = FetchDataTypesVisitor.fetchDataTypes(metadataHelper, config.getDatatypeFilter(), script);
config.setDataTypes(fieldToDataTypeMap);
config.setQueryFieldsDatatypes(fieldToDataTypeMap);
final Set<String> indexedFields = metadataHelper.getIndexedFields(dataTypes);
config.setIndexedFields(indexedFields);
final Set<String> reverseIndexedFields = metadataHelper.getReverseIndexedFields(dataTypes);
config.setReverseIndexedFields(reverseIndexedFields);
final Multimap<String, Type<?>> normalizedFields = metadataHelper.getFieldsToDatatypes(dataTypes);
config.setNormalizedFieldsDatatypes(normalizedFields);
if (log.isTraceEnabled()) {
log.trace("Normalizers:");
for (String field : fieldToDataTypeMap.keySet()) {
log.trace(field + ": " + fieldToDataTypeMap.get(field));
}
}
script = ExpandMultiNormalizedTerms.expandTerms(config, metadataHelper, script);
Multimap<String, String> literals = LiteralNodeVisitor.getLiterals(script);
Multimap<String, String> patterns = PatternNodeVisitor.getPatterns(script);
Map<Entry<String, String>, Range> rangesForTerms = Maps.newHashMap();
Map<Entry<String, String>, Entry<Range, Boolean>> rangesForPatterns = Maps.newHashMap();
config.setNormalizedTerms(literals);
config.setNormalizedPatterns(patterns);
if (log.isDebugEnabled()) {
log.debug("Normalized Literals = " + literals);
log.debug("Normalized Patterns = " + patterns);
}
for (Entry<String, String> entry : literals.entries()) {
rangesForTerms.put(entry, ShardIndexQueryTableStaticMethods.getLiteralRange(entry));
}
for (Entry<String, String> entry : patterns.entries()) {
ShardIndexQueryTableStaticMethods.RefactoredRangeDescription r = ShardIndexQueryTableStaticMethods.getRegexRange(entry, isFullTableScanEnabled(), metadataHelper, config);
rangesForPatterns.put(entry, Maps.immutableEntry(r.range, r.isForReverseIndex));
}
config.setRangesForTerms(rangesForTerms);
config.setRangesForPatterns(rangesForPatterns);
return config;
}
Aggregations