Search in sources :

Example 6 with TermEnum

use of org.apache.lucene.index.TermEnum in project greplin-lucene-utils by Cue.

the class PhrasePrefixQuery method getPrefixTerms.

/**
   * For the given index reader, gets terms that match the given prefix.
   * @param prefix the prefix
   * @param reader the index reader
   * @return matching terms
   * @throws IOException if IO errors are encountered
   */
private Term[] getPrefixTerms(final String prefix, final IndexReader reader) throws IOException {
    TermEnum enumerator = reader.terms(new Term(this.field, prefix));
    List<Term> terms = Lists.newArrayList();
    try {
        do {
            Term term = enumerator.term();
            if (term != null && term.text().startsWith(prefix) && term.field().equals(this.field)) {
                terms.add(term);
            } else {
                break;
            }
        } while (enumerator.next());
    } finally {
        enumerator.close();
    }
    if (terms.size() == 0) {
        return null;
    } else {
        return terms.toArray(new Term[terms.size()]);
    }
}
Also used : Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum)

Example 7 with TermEnum

use of org.apache.lucene.index.TermEnum in project zm-mailbox by Zimbra.

the class RemoteMailQueue method summarize.

private void summarize(SearchResult result, IndexReader indexReader) throws IOException {
    TermEnum terms = indexReader.terms();
    boolean hasDeletions = indexReader.hasDeletions();
    do {
        Term term = terms.term();
        if (term != null) {
            String field = term.field();
            if (field != null && field.length() > 0) {
                QueueAttr attr = QueueAttr.valueOf(field);
                if (attr == QueueAttr.addr || attr == QueueAttr.host || attr == QueueAttr.from || attr == QueueAttr.to || attr == QueueAttr.fromdomain || attr == QueueAttr.todomain || attr == QueueAttr.reason || attr == QueueAttr.received) {
                    List<SummaryItem> list = result.sitems.get(attr);
                    if (list == null) {
                        list = new LinkedList<SummaryItem>();
                        result.sitems.put(attr, list);
                    }
                    int count = 0;
                    if (hasDeletions) {
                        TermDocs termDocs = indexReader.termDocs(term);
                        while (termDocs.next()) {
                            if (!indexReader.isDeleted(termDocs.doc())) {
                                count++;
                            }
                        }
                    } else {
                        count = terms.docFreq();
                    }
                    if (count > 0) {
                        list.add(new SummaryItem(term.text(), count));
                    }
                }
            }
        }
    } while (terms.next());
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum)

Example 8 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.

the class IndexMigration method migrate.

/**
     * Checks if the given <code>index</code> needs to be migrated.
     *
     * @param index the index to check and migration if needed.
     * @param directoryManager the directory manager.
     * @param oldSeparatorChar the old separator char that needs to be replaced.
     * @throws IOException if an error occurs while migrating the index.
     */
public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar) throws IOException {
    Directory indexDir = index.getDirectory();
    log.debug("Checking {} ...", indexDir);
    ReadOnlyIndexReader reader = index.getReadOnlyIndexReader();
    try {
        if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) {
            // index was created with Jackrabbit 1.5 or higher
            // no need for migration
            log.debug("IndexFormatVersion >= V3, no migration needed");
            return;
        }
        // assert: there is at least one node in the index, otherwise the
        //         index format version would be at least V3
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, ""));
        try {
            Term t = terms.term();
            if (t.text().indexOf(oldSeparatorChar) == -1) {
                log.debug("Index already migrated");
                return;
            }
        } finally {
            terms.close();
        }
    } finally {
        reader.release();
        index.releaseWriterAndReaders();
    }
    // if we get here then the index must be migrated
    log.debug("Index requires migration {}", indexDir);
    String migrationName = index.getName() + "_v36";
    if (directoryManager.hasDirectory(migrationName)) {
        directoryManager.delete(migrationName);
    }
    Directory migrationDir = directoryManager.getDirectory(migrationName);
    final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer());
    c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy()));
    c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
    try {
        IndexWriter writer = new IndexWriter(migrationDir, c);
        try {
            IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar);
            try {
                writer.addIndexes(r);
                writer.forceMerge(1);
                writer.close();
            } finally {
                r.close();
            }
        } finally {
            writer.close();
        }
    } finally {
        migrationDir.close();
    }
    directoryManager.delete(index.getName());
    if (!directoryManager.rename(migrationName, index.getName())) {
        throw new IOException("failed to move migrated directory " + migrationDir);
    }
    log.info("Migrated " + index.getName());
}
Also used : KeepOnlyLastCommitDeletionPolicy(org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy) UpgradeIndexMergePolicy(org.apache.lucene.index.UpgradeIndexMergePolicy) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) TermEnum(org.apache.lucene.index.TermEnum) LogByteSizeMergePolicy(org.apache.lucene.index.LogByteSizeMergePolicy) IndexWriter(org.apache.lucene.index.IndexWriter) FilterIndexReader(org.apache.lucene.index.FilterIndexReader) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 9 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.

the class SharedFieldCache method getValueIndex.

/**
     * Creates a <code>ValueIndex</code> for a <code>field</code> and a term
     * <code>prefix</code>. The term prefix acts as the property name for the
     * shared <code>field</code>.
     * <p>
     * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
     *
     * @param reader     the <code>IndexReader</code>.
     * @param field      name of the shared field.
     * @param prefix     the property name, will be used as term prefix.
     * @return a ValueIndex that contains the field values and order
     *         information.
     * @throws IOException if an error occurs while reading from the index.
     */
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException {
    if (reader instanceof ReadOnlyIndexReader) {
        reader = ((ReadOnlyIndexReader) reader).getBase();
    }
    field = field.intern();
    ValueIndex ret = lookup(reader, field, prefix);
    if (ret == null) {
        final int maxDocs = reader.maxDoc();
        Comparable<?>[] retArray = new Comparable<?>[maxDocs];
        Map<Integer, Integer> positions = new HashMap<Integer, Integer>();
        boolean usingSimpleComparable = true;
        int setValues = 0;
        if (maxDocs > 0) {
            IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
            boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
            TermDocs termDocs;
            byte[] payload = null;
            int type;
            if (hasPayloads) {
                termDocs = reader.termPositions();
                payload = new byte[1];
            } else {
                termDocs = reader.termDocs();
            }
            TermEnum termEnum = reader.terms(new Term(field, prefix));
            try {
                if (termEnum.term() == null) {
                    throw new RuntimeException("no terms in field " + field);
                }
                do {
                    Term term = termEnum.term();
                    if (term.field() != field || !term.text().startsWith(prefix)) {
                        break;
                    }
                    final String value = termValueAsString(term, prefix);
                    termDocs.seek(term);
                    while (termDocs.next()) {
                        int termPosition = 0;
                        type = PropertyType.UNDEFINED;
                        if (hasPayloads) {
                            TermPositions termPos = (TermPositions) termDocs;
                            termPosition = termPos.nextPosition();
                            if (termPos.isPayloadAvailable()) {
                                payload = termPos.getPayload(payload, 0);
                                type = PropertyMetaData.fromByteArray(payload).getPropertyType();
                            }
                        }
                        setValues++;
                        Comparable<?> v = getValue(value, type);
                        int doc = termDocs.doc();
                        Comparable<?> ca = retArray[doc];
                        if (ca == null) {
                            if (usingSimpleComparable) {
                                // put simple value on the queue
                                positions.put(doc, termPosition);
                                retArray[doc] = v;
                            } else {
                                retArray[doc] = new ComparableArray(v, termPosition);
                            }
                        } else {
                            if (ca instanceof ComparableArray) {
                                ((ComparableArray) ca).insert(v, termPosition);
                            } else {
                                // Comparable to ComparableArray
                                for (int pos : positions.keySet()) {
                                    retArray[pos] = new ComparableArray(retArray[pos], positions.get(pos));
                                }
                                positions = null;
                                usingSimpleComparable = false;
                                ComparableArray caNew = (ComparableArray) retArray[doc];
                                retArray[doc] = caNew.insert(v, termPosition);
                            }
                        }
                    }
                } while (termEnum.next());
            } finally {
                termDocs.close();
                termEnum.close();
            }
        }
        ValueIndex value = new ValueIndex(retArray, setValues);
        store(reader, field, prefix, value);
        return value;
    }
    return ret;
}
Also used : HashMap(java.util.HashMap) WeakHashMap(java.util.WeakHashMap) TermDocs(org.apache.lucene.index.TermDocs) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum) TermPositions(org.apache.lucene.index.TermPositions)

Example 10 with TermEnum

use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.

the class ChainedTermEnumTest method createTermEnum.

protected TermEnum createTermEnum(String prefix, int numTerms) throws IOException {
    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
    try {
        for (int i = 0; i < numTerms; i++) {
            Document doc = new Document();
            doc.add(new Field("field", true, prefix + i, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
            writer.addDocument(doc);
        }
    } finally {
        writer.close();
    }
    IndexReader reader = IndexReader.open(dir);
    try {
        TermEnum terms = reader.terms();
        if (terms.term() == null) {
            // position at first term
            terms.next();
        }
        return terms;
    } finally {
        reader.close();
    }
}
Also used : Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) TermEnum(org.apache.lucene.index.TermEnum) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

TermEnum (org.apache.lucene.index.TermEnum)12 Term (org.apache.lucene.index.Term)11 TermDocs (org.apache.lucene.index.TermDocs)5 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 IndexReader (org.apache.lucene.index.IndexReader)3 IndexWriter (org.apache.lucene.index.IndexWriter)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 TermPositions (org.apache.lucene.index.TermPositions)2 Directory (org.apache.lucene.store.Directory)2 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 Map (java.util.Map)1 WeakHashMap (java.util.WeakHashMap)1 Node (javax.jcr.Node)1 RepositoryException (javax.jcr.RepositoryException)1 Session (javax.jcr.Session)1 QValueConstraint (org.apache.jackrabbit.spi.QValueConstraint)1 Document (org.apache.lucene.document.Document)1