use of org.exist.storage.NodePath in project exist by eXist-db.
the class DOMIndexer method store.
/**
* Store the nodes.
*/
public void store() {
// Create a wrapper element as root node
final ElementImpl elem = new ElementImpl(ROOT_QNAME, broker.getBrokerPool().getSymbols());
elem.setNodeId(broker.getBrokerPool().getNodeFactory().createInstance());
elem.setOwnerDocument(targetDoc);
elem.setChildCount(doc.getChildCount());
elem.addNamespaceMapping(Namespaces.EXIST_NS_PREFIX, Namespaces.EXIST_NS);
final NodePath path = new NodePath();
path.addComponent(ROOT_QNAME);
stack.push(elem);
broker.storeNode(transaction, elem, path, indexSpec);
targetDoc.appendChild((NodeHandle) elem);
elem.setChildCount(0);
// store the document nodes
int top = (doc.size > 1) ? 1 : -1;
while (top > 0) {
store(top, path);
top = doc.getNextSiblingFor(top);
}
// Close the wrapper element
stack.pop();
broker.endElement(elem, path, null);
path.removeLastComponent();
}
use of org.exist.storage.NodePath in project exist by eXist-db.
the class IndexUtils method scanNode.
public static void scanNode(DBBroker broker, Txn transaction, IStoredNode node, StreamListener listener) {
try (final INodeIterator iterator = broker.getNodeIterator(node)) {
iterator.next();
final NodePath path = node.getPath();
scanNode(transaction, iterator, node, listener, path);
} catch (final IOException ioe) {
LOG.warn("Unable to close iterator", ioe);
}
}
use of org.exist.storage.NodePath in project exist by eXist-db.
the class LuceneMatchListener method scanMatches.
private void scanMatches(final NodeProxy p) {
// Collect the text content of all descendants of p.
// Remember the start offsets of the text nodes for later use.
final NodePath path = getPath(p);
final LuceneIndexConfig idxConf = config.getConfig(path).next();
final TextExtractor extractor = new DefaultTextExtractor();
extractor.configure(config, idxConf);
final OffsetList offsets = new OffsetList();
int level = 0;
int textOffset = 0;
try {
final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(p, false);
while (reader.hasNext()) {
final int ev = reader.next();
switch(ev) {
case XMLStreamConstants.END_ELEMENT:
if (--level < 0) {
break;
}
// call extractor.endElement unless this is the root of the current fragment
if (level > 0) {
textOffset += extractor.endElement(reader.getQName());
}
break;
case XMLStreamConstants.START_ELEMENT:
// call extractor.startElement unless this is the root of the current fragment
if (level > 0) {
textOffset += extractor.startElement(reader.getQName());
}
++level;
break;
case XMLStreamConstants.CHARACTERS:
final NodeId nodeId = (NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID);
textOffset += extractor.beforeCharacters();
offsets.add(textOffset, nodeId);
textOffset += extractor.characters(reader.getXMLText());
break;
}
}
} catch (final IOException | XMLStreamException e) {
LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
}
// Retrieve the Analyzer for the NodeProxy that was used for
// indexing and querying.
Analyzer analyzer = idxConf.getAnalyzer();
if (analyzer == null) {
// Otherwise use system default Lucene analyzer (from conf.xml)
// to tokenize the text and find matching query terms.
analyzer = index.getDefaultAnalyzer();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Analyzer: {} for path: {}", analyzer, path);
}
final String str = extractor.getText().toString();
try (final Reader reader = new StringReader(str);
final TokenStream tokenStream = analyzer.tokenStream(null, reader)) {
tokenStream.reset();
final MarkableTokenFilter stream = new MarkableTokenFilter(tokenStream);
while (stream.incrementToken()) {
String text = stream.getAttribute(CharTermAttribute.class).toString();
final Query query = termMap.get(text);
if (query != null) {
// single words which may also occur elsewhere in the document
if (query instanceof PhraseQuery) {
final PhraseQuery phraseQuery = (PhraseQuery) query;
final Term[] terms = phraseQuery.getTerms();
if (text.equals(terms[0].text())) {
// Scan the following text and collect tokens to see
// if they are part of the phrase.
stream.mark();
int t = 1;
final List<State> stateList = new ArrayList<>(terms.length);
stateList.add(stream.captureState());
while (stream.incrementToken() && t < terms.length) {
text = stream.getAttribute(CharTermAttribute.class).toString();
if (text.equals(terms[t].text())) {
stateList.add(stream.captureState());
if (++t == terms.length) {
break;
}
} else {
// stream.reset();
break;
}
}
if (stateList.size() == terms.length) {
// we indeed have a phrase match. record the offsets of its terms.
int lastIdx = -1;
for (int i = 0; i < terms.length; i++) {
stream.restoreState(stateList.get(i));
final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
final int idx = offsets.getIndex(offsetAttr.startOffset());
final NodeId nodeId = offsets.ids[idx];
final Offset offset = nodesWithMatch.get(nodeId);
if (offset != null) {
if (lastIdx == idx) {
offset.setEndOffset(offsetAttr.endOffset() - offsets.offsets[idx]);
} else {
offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
}
} else {
nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
}
lastIdx = idx;
}
}
}
// End of phrase handling
} else {
final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
final int idx = offsets.getIndex(offsetAttr.startOffset());
final NodeId nodeId = offsets.ids[idx];
final Offset offset = nodesWithMatch.get(nodeId);
if (offset != null) {
offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
} else {
nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
}
}
}
}
} catch (final IOException e) {
LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
}
}
use of org.exist.storage.NodePath in project exist by eXist-db.
the class Field method highlightMatches.
/**
* Highlight matches in field content using the analyzer defined for the field.
*
* @param fieldName the name of the field
* @param proxy node on which the field is defined
* @param match the lucene match attached to the node
* @param text the content of the field
* @return a sequence of exist:field elements containing the field content with matches enclosed in exist:match
* @throws XPathException in case of error
* @throws IOException in case of a lucene error
*/
private Sequence highlightMatches(final String fieldName, final NodeProxy proxy, final LuceneMatch match, final Sequence text) throws XPathException, IOException {
final LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID);
final Map<Object, Query> terms = index.getTerms(match.getQuery());
final NodePath path = LuceneMatchListener.getPath(proxy);
final LuceneConfig config = index.getLuceneConfig(context.getBroker(), proxy.getDocumentSet());
LuceneIndexConfig idxConf = config.getConfig(path).next();
if (idxConf == null) {
// no lucene index: no fields to highlight
return Sequence.EMPTY_SEQUENCE;
}
final Analyzer analyzer = idxConf.getAnalyzer();
context.pushDocumentContext();
try {
final MemTreeBuilder builder = context.getDocumentBuilder();
builder.startDocument();
final InMemoryNodeSet result = new InMemoryNodeSet(text.getItemCount());
for (final SequenceIterator si = text.iterate(); si.hasNext(); ) {
final int nodeNr = builder.startElement(Namespaces.EXIST_NS, "field", "exist:field", null);
final String content = si.nextItem().getStringValue();
int currentPos = 0;
try (final Reader reader = new StringReader(content);
final TokenStream tokenStream = analyzer.tokenStream(fieldName, reader)) {
tokenStream.reset();
final MarkableTokenFilter stream = new MarkableTokenFilter(tokenStream);
while (stream.incrementToken()) {
String token = stream.getAttribute(CharTermAttribute.class).toString();
final Query query = terms.get(token);
if (query != null) {
if (match.getQuery() instanceof PhraseQuery) {
final Term[] phraseTerms = ((PhraseQuery) match.getQuery()).getTerms();
if (token.equals(phraseTerms[0].text())) {
// Scan the following text and collect tokens to see
// if they are part of the phrase.
stream.mark();
int t = 1;
OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class);
final int startOffset = offset.startOffset();
int endOffset = offset.endOffset();
while (stream.incrementToken() && t < phraseTerms.length) {
token = stream.getAttribute(CharTermAttribute.class).toString();
if (token.equals(phraseTerms[t].text())) {
offset = stream.getAttribute(OffsetAttribute.class);
endOffset = offset.endOffset();
t++;
if (t == phraseTerms.length) {
break;
}
} else {
break;
}
}
if (t == phraseTerms.length) {
if (currentPos < startOffset) {
builder.characters(content.substring(currentPos, startOffset));
}
builder.startElement(Namespaces.EXIST_NS, "match", "exist:match", null);
builder.characters(content.substring(startOffset, endOffset));
builder.endElement();
currentPos = endOffset;
}
}
// End of phrase handling
} else {
final OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class);
if (currentPos < offset.startOffset()) {
builder.characters(content.substring(currentPos, offset.startOffset()));
}
builder.startElement(Namespaces.EXIST_NS, "match", "exist:match", null);
builder.characters(content.substring(offset.startOffset(), offset.endOffset()));
builder.endElement();
currentPos = offset.endOffset();
}
}
}
}
if (currentPos < content.length() - 1) {
builder.characters(content.substring(currentPos));
}
builder.endElement();
result.add(builder.getDocument().getNode(nodeNr));
}
return result;
} finally {
context.popDocumentContext();
}
}
use of org.exist.storage.NodePath in project exist by eXist-db.
the class OptimizeFieldPragma method tryRewriteToFields.
private Expression tryRewriteToFields(LocationStep locationStep, final Predicate[] preds, NodePath contextPath, Sequence contextSequence) throws XPathException {
// without context path, we cannot rewrite the entire query
if (contextPath != null) {
final List<Predicate> notOptimizable = new ArrayList<>(preds.length);
List<RangeIndexConfig> configs = getConfigurations(contextSequence);
// walk through the predicates attached to the current location step
// check if expression can be optimized
final Map<Predicate, List<Expression>> predicateArgs = new IdentityHashMap<>(preds.length);
for (final Predicate pred : preds) {
List<Expression> args = null;
SequenceConstructor arg0 = null;
SequenceConstructor arg1 = null;
if (pred.getLength() != 1) {
// can only optimize predicates with one expression
notOptimizable.add(pred);
continue;
}
Expression innerExpr = pred.getExpression(0);
List<LocationStep> steps = RangeQueryRewriter.getStepsToOptimize(innerExpr);
if (steps == null) {
notOptimizable.add(pred);
continue;
}
// compute left hand path
NodePath innerPath = RangeQueryRewriter.toNodePath(steps);
if (innerPath == null) {
notOptimizable.add(pred);
continue;
}
NodePath path = new NodePath(contextPath);
path.append(innerPath);
if (path.length() > 0) {
// find all complex range index configurations matching the full path to the predicate expression
final List<ComplexRangeIndexConfigElement> rices = findConfigurations(path, configs);
// config with most conditions for path comes first
rices.sort(ComplexRangeIndexConfigElement.NUM_CONDITIONS_COMPARATOR);
if (rices.isEmpty()) {
notOptimizable.add(pred);
continue;
}
// found index configuration with sub-fields
int predIdx = -1;
for (int i = 0; i < preds.length; i++) {
if (preds[i] == pred) {
predIdx = i;
break;
}
}
final Predicate[] precedingPreds = Arrays.copyOf(preds, predIdx);
final ArrayList<Predicate> matchedPreds = new ArrayList<>();
ComplexRangeIndexConfigElement rice = null;
for (ComplexRangeIndexConfigElement testRice : rices) {
if (testRice.getNumberOfConditions() > 0) {
// find a config element where the conditions match preceding predicates
matchedPreds.clear();
for (Predicate precedingPred : precedingPreds) {
if (testRice.findCondition(precedingPred)) {
matchedPreds.add(precedingPred);
}
}
if (matchedPreds.size() == testRice.getNumberOfConditions()) {
// all conditions matched
rice = testRice;
// if any preceding predicates found to be part of a condition for this config
// had been matched to another config before, remove them as is is the correct match
predicateArgs.keySet().removeAll(matchedPreds);
// also do not re-add them after optimizing
notOptimizable.removeAll(matchedPreds);
break;
}
} else {
// no conditional configs for this node path, take the first one found if any
rice = testRice;
}
}
if (rice != null && rice.getNodePath().match(contextPath)) {
// check for a matching sub-path and retrieve field information
RangeIndexConfigField field = rice.getField(path);
if (field != null) {
if (args == null) {
// initialize args
args = new ArrayList<>(4);
arg0 = new SequenceConstructor(context);
args.add(arg0);
arg1 = new SequenceConstructor(context);
args.add(arg1);
}
// field is added to the sequence in first parameter
arg0.add(new LiteralValue(context, new StringValue(field.getName())));
// operator
arg1.add(new LiteralValue(context, new StringValue(RangeQueryRewriter.getOperator(innerExpr).toString())));
// append right hand expression as additional parameter
args.add(getKeyArg(innerExpr));
// store the collected arguments with a reference to the predicate
// so they can be removed if a better match is found (if the predicate happens to be
// one of the conditions for the following predicate
predicateArgs.put(pred, args);
} else {
notOptimizable.add(pred);
continue;
}
} else {
notOptimizable.add(pred);
continue;
}
} else {
notOptimizable.add(pred);
continue;
}
}
if (!predicateArgs.isEmpty()) {
// the entire filter expression can be replaced
// create range:field-equals function
FieldLookup func = new FieldLookup(context, FieldLookup.signatures[0]);
func.setFallback(locationStep);
func.setLocation(locationStep.getLine(), locationStep.getColumn());
if (predicateArgs.size() == 1) {
func.setArguments(predicateArgs.entrySet().iterator().next().getValue());
} else {
final List<Expression> mergedArgs = new ArrayList<>(predicateArgs.size() * 4);
final SequenceConstructor arg0 = new SequenceConstructor(context);
mergedArgs.add(arg0);
final SequenceConstructor arg1 = new SequenceConstructor(context);
mergedArgs.add(arg1);
for (final List<Expression> args : predicateArgs.values()) {
arg0.add(args.get(0));
arg1.add(args.get(1));
mergedArgs.addAll(args.subList(2, args.size()));
}
func.setArguments(mergedArgs);
}
Expression optimizedExpr = new InternalFunctionCall(func);
if (!notOptimizable.isEmpty()) {
final FilteredExpression filtered = new FilteredExpression(context, optimizedExpr);
for (Predicate pred : notOptimizable) {
filtered.addPredicate(pred);
}
optimizedExpr = filtered;
}
return optimizedExpr;
}
}
return null;
}
Aggregations