Search in sources :

Example 1 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.

the class MatchAllScorer method calculateDocFilter.

/**
 * Calculates a BitSet filter that includes all the nodes
 * that have content in properties according to the field name
 * passed in the constructor of this MatchAllScorer.
 *
 * @throws IOException if an error occurs while reading from
 *                     the search index.
 */
@SuppressWarnings({ "unchecked" })
private void calculateDocFilter(PerQueryCache cache) throws IOException {
    Map<String, BitSet> readerCache = (Map<String, BitSet>) cache.get(MatchAllScorer.class, reader);
    if (readerCache == null) {
        readerCache = new HashMap<String, BitSet>();
        cache.put(MatchAllScorer.class, reader, readerCache);
    }
    // get BitSet for field
    docFilter = readerCache.get(field);
    if (docFilter != null) {
        // use cached BitSet;
        return;
    }
    // otherwise calculate new
    docFilter = new BitSet(reader.maxDoc());
    // we match all terms
    String namedValue = FieldNames.createNamedValue(field, "");
    TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, namedValue));
    try {
        TermDocs docs = reader.termDocs();
        try {
            while (terms.term() != null && terms.term().field() == FieldNames.PROPERTIES && terms.term().text().startsWith(namedValue)) {
                docs.seek(terms);
                while (docs.next()) {
                    docFilter.set(docs.doc());
                }
                terms.next();
            }
        } finally {
            docs.close();
        }
    } finally {
        terms.close();
    }
    // put BitSet into cache
    readerCache.put(field, docFilter);
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) BitSet(java.util.BitSet) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.

the class EquiJoin method getPropertyTerms.

private Set<Map.Entry<Term, String>> getPropertyTerms(String property) throws IOException {
    Map<Term, String> map = new HashMap<Term, String>();
    Term prefix = new Term(FieldNames.PROPERTIES, FieldNames.createNamedValue(property, ""));
    TermEnum terms = reader.terms(prefix);
    do {
        Term term = terms.term();
        if (term == null || !term.field().equals(prefix.field()) || !term.text().startsWith(prefix.text())) {
            break;
        }
        map.put(term, term.text().substring(prefix.text().length()));
    } while (terms.next());
    return map.entrySet();
}
Also used : HashMap(java.util.HashMap) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum)

Example 3 with TermEnum

use of org.apache.lucene.index.TermEnum in project zm-mailbox by Zimbra.

the class LuceneViewer method dumpTerms.

private void dumpTerms() throws IOException {
    outputBanner("Terms (in Term.compareTo() order)");
    TermEnum terms = mIndexReader.terms();
    int order = 0;
    while (terms.next()) {
        order++;
        Term term = terms.term();
        String field = term.field();
        String text = term.text();
        if (!wantThisTerm(field, text)) {
            continue;
        }
        outputLn(order + " " + field + ": " + text);
        /*
             * for each term, print the
             * <document, frequency, <position>* > tuples for a term.
             *
             * document:  document in which the Term appears
             * frequency: number of time the Term appears in the document
             * position:  position for each appearance in the document
             *
             * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED));
             *      then the tuple for Term("field", "two") in this document would be like:
             *      88, 2, <2, 4>
             *      where
             *      88 is the document number
             *      2  is the frequency this term appear in the document
             *      <2, 4> are the positions for each appearance in the document
             */
        // by TermPositions
        outputLn("    document, frequency, <position>*");
        // keep track of docs that appear in all terms that are filtered in.
        Set<Integer> docNums = null;
        if (hasFilters()) {
            docNums = new HashSet<Integer>();
        }
        TermPositions termPos = mIndexReader.termPositions(term);
        while (termPos.next()) {
            int docNum = termPos.doc();
            int freq = termPos.freq();
            if (docNums != null) {
                docNums.add(docNum);
            }
            output("    " + docNum + ", " + freq + ", <");
            boolean first = true;
            for (int f = 0; f < freq; f++) {
                int positionInDoc = termPos.nextPosition();
                if (!first) {
                    output(" ");
                } else {
                    first = false;
                }
                output(positionInDoc + "");
            }
            outputLn(">");
        }
        termPos.close();
        if (docNums != null) {
            computeDocsIntersection(docNums);
        }
        outputLn();
        if (order % 1000 == 0) {
            mConsole.debug("Dumped " + order + " terms");
        }
    }
    terms.close();
}
Also used : Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum) TermPositions(org.apache.lucene.index.TermPositions)

Example 4 with TermEnum

use of org.apache.lucene.index.TermEnum in project bigbluebutton by bigbluebutton.

the class Index method startIndex.

public void startIndex(String uid) {
    try {
        IndexReader.unlock(FSDirectory.getDirectory(ConfigHandler.indexPath));
        if (logger.isInfoEnabled()) {
            logger.info("index file path " + ConfigHandler.indexPath);
        }
        reader = IndexReader.open(ConfigHandler.indexPath);
        TermEnum uidIter = reader.terms(new Term("uid"));
        while (uidIter.term() != null) {
            if (uid.equalsIgnoreCase(uidIter.term().text())) {
                reader.deleteDocuments(uidIter.term());
            }
            uidIter.next();
        }
        reader.close();
    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        writer = new IndexWriter(ConfigHandler.indexPath, new StandardAnalyzer(), new IndexWriter.MaxFieldLength(1000000));
    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) TermEnum(org.apache.lucene.index.TermEnum)

Example 5 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit-oak by apache.

the class RepositoryUpgrade method assertNoLongNames.

void assertNoLongNames() throws RepositoryException {
    Session session = source.getRepository().login(null, null);
    boolean longNameFound = false;
    try {
        IndexReader reader = IndexAccessor.getReader(source);
        if (reader == null) {
            return;
        }
        TermEnum terms = reader.terms(new Term(FieldNames.LOCAL_NAME));
        while (terms.next()) {
            Term t = terms.term();
            if (!FieldNames.LOCAL_NAME.equals(t.field())) {
                continue;
            }
            String name = t.text();
            if (nameMayBeTooLong(name)) {
                TermDocs docs = reader.termDocs(t);
                if (docs.next()) {
                    int docId = docs.doc();
                    String uuid = reader.document(docId).get(FieldNames.UUID);
                    Node n = session.getNodeByIdentifier(uuid);
                    if (isNameTooLong(n.getName(), n.getParent().getPath())) {
                        logger.warn("Name too long: {}", n.getPath());
                        longNameFound = true;
                    }
                }
            }
        }
    } catch (IOException e) {
        throw new RepositoryException(e);
    } finally {
        session.logout();
    }
    if (longNameFound) {
        logger.error("Node with a long name has been found. Please fix the content or rerun the migration with {} option.", SKIP_NAME_CHECK);
        throw new RepositoryException("Node with a long name has been found.");
    }
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) Node(javax.jcr.Node) IndexReader(org.apache.lucene.index.IndexReader) RepositoryException(javax.jcr.RepositoryException) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) TermEnum(org.apache.lucene.index.TermEnum) QValueConstraint(org.apache.jackrabbit.spi.QValueConstraint) Session(javax.jcr.Session)

Aggregations

TermEnum (org.apache.lucene.index.TermEnum)12 Term (org.apache.lucene.index.Term)11 TermDocs (org.apache.lucene.index.TermDocs)5 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 IndexReader (org.apache.lucene.index.IndexReader)3 IndexWriter (org.apache.lucene.index.IndexWriter)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 TermPositions (org.apache.lucene.index.TermPositions)2 Directory (org.apache.lucene.store.Directory)2 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 Map (java.util.Map)1 WeakHashMap (java.util.WeakHashMap)1 Node (javax.jcr.Node)1 RepositoryException (javax.jcr.RepositoryException)1 Session (javax.jcr.Session)1 QValueConstraint (org.apache.jackrabbit.spi.QValueConstraint)1 Document (org.apache.lucene.document.Document)1