use of org.apache.lucene.index.TermEnum in project greplin-lucene-utils by Cue.
the class PhrasePrefixQuery method getPrefixTerms.
/**
* For the given index reader, gets terms that match the given prefix.
* @param prefix the prefix
* @param reader the index reader
* @return matching terms
* @throws IOException if IO errors are encountered
*/
private Term[] getPrefixTerms(final String prefix, final IndexReader reader) throws IOException {
TermEnum enumerator = reader.terms(new Term(this.field, prefix));
List<Term> terms = Lists.newArrayList();
try {
do {
Term term = enumerator.term();
if (term != null && term.text().startsWith(prefix) && term.field().equals(this.field)) {
terms.add(term);
} else {
break;
}
} while (enumerator.next());
} finally {
enumerator.close();
}
if (terms.size() == 0) {
return null;
} else {
return terms.toArray(new Term[terms.size()]);
}
}
use of org.apache.lucene.index.TermEnum in project zm-mailbox by Zimbra.
the class RemoteMailQueue method summarize.
private void summarize(SearchResult result, IndexReader indexReader) throws IOException {
TermEnum terms = indexReader.terms();
boolean hasDeletions = indexReader.hasDeletions();
do {
Term term = terms.term();
if (term != null) {
String field = term.field();
if (field != null && field.length() > 0) {
QueueAttr attr = QueueAttr.valueOf(field);
if (attr == QueueAttr.addr || attr == QueueAttr.host || attr == QueueAttr.from || attr == QueueAttr.to || attr == QueueAttr.fromdomain || attr == QueueAttr.todomain || attr == QueueAttr.reason || attr == QueueAttr.received) {
List<SummaryItem> list = result.sitems.get(attr);
if (list == null) {
list = new LinkedList<SummaryItem>();
result.sitems.put(attr, list);
}
int count = 0;
if (hasDeletions) {
TermDocs termDocs = indexReader.termDocs(term);
while (termDocs.next()) {
if (!indexReader.isDeleted(termDocs.doc())) {
count++;
}
}
} else {
count = terms.docFreq();
}
if (count > 0) {
list.add(new SummaryItem(term.text(), count));
}
}
}
}
} while (terms.next());
}
use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.
the class IndexMigration method migrate.
/**
* Checks if the given <code>index</code> needs to be migrated.
*
* @param index the index to check and migration if needed.
* @param directoryManager the directory manager.
* @param oldSeparatorChar the old separator char that needs to be replaced.
* @throws IOException if an error occurs while migrating the index.
*/
public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar) throws IOException {
Directory indexDir = index.getDirectory();
log.debug("Checking {} ...", indexDir);
ReadOnlyIndexReader reader = index.getReadOnlyIndexReader();
try {
if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) {
// index was created with Jackrabbit 1.5 or higher
// no need for migration
log.debug("IndexFormatVersion >= V3, no migration needed");
return;
}
// assert: there is at least one node in the index, otherwise the
// index format version would be at least V3
TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, ""));
try {
Term t = terms.term();
if (t.text().indexOf(oldSeparatorChar) == -1) {
log.debug("Index already migrated");
return;
}
} finally {
terms.close();
}
} finally {
reader.release();
index.releaseWriterAndReaders();
}
// if we get here then the index must be migrated
log.debug("Index requires migration {}", indexDir);
String migrationName = index.getName() + "_v36";
if (directoryManager.hasDirectory(migrationName)) {
directoryManager.delete(migrationName);
}
Directory migrationDir = directoryManager.getDirectory(migrationName);
final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer());
c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy()));
c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
try {
IndexWriter writer = new IndexWriter(migrationDir, c);
try {
IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar);
try {
writer.addIndexes(r);
writer.forceMerge(1);
writer.close();
} finally {
r.close();
}
} finally {
writer.close();
}
} finally {
migrationDir.close();
}
directoryManager.delete(index.getName());
if (!directoryManager.rename(migrationName, index.getName())) {
throw new IOException("failed to move migrated directory " + migrationDir);
}
log.info("Migrated " + index.getName());
}
use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.
the class SharedFieldCache method getValueIndex.
/**
* Creates a <code>ValueIndex</code> for a <code>field</code> and a term
* <code>prefix</code>. The term prefix acts as the property name for the
* shared <code>field</code>.
* <p>
* This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
*
* @param reader the <code>IndexReader</code>.
* @param field name of the shared field.
* @param prefix the property name, will be used as term prefix.
* @return a ValueIndex that contains the field values and order
* information.
* @throws IOException if an error occurs while reading from the index.
*/
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException {
if (reader instanceof ReadOnlyIndexReader) {
reader = ((ReadOnlyIndexReader) reader).getBase();
}
field = field.intern();
ValueIndex ret = lookup(reader, field, prefix);
if (ret == null) {
final int maxDocs = reader.maxDoc();
Comparable<?>[] retArray = new Comparable<?>[maxDocs];
Map<Integer, Integer> positions = new HashMap<Integer, Integer>();
boolean usingSimpleComparable = true;
int setValues = 0;
if (maxDocs > 0) {
IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
TermDocs termDocs;
byte[] payload = null;
int type;
if (hasPayloads) {
termDocs = reader.termPositions();
payload = new byte[1];
} else {
termDocs = reader.termDocs();
}
TermEnum termEnum = reader.terms(new Term(field, prefix));
try {
if (termEnum.term() == null) {
throw new RuntimeException("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field || !term.text().startsWith(prefix)) {
break;
}
final String value = termValueAsString(term, prefix);
termDocs.seek(term);
while (termDocs.next()) {
int termPosition = 0;
type = PropertyType.UNDEFINED;
if (hasPayloads) {
TermPositions termPos = (TermPositions) termDocs;
termPosition = termPos.nextPosition();
if (termPos.isPayloadAvailable()) {
payload = termPos.getPayload(payload, 0);
type = PropertyMetaData.fromByteArray(payload).getPropertyType();
}
}
setValues++;
Comparable<?> v = getValue(value, type);
int doc = termDocs.doc();
Comparable<?> ca = retArray[doc];
if (ca == null) {
if (usingSimpleComparable) {
// put simple value on the queue
positions.put(doc, termPosition);
retArray[doc] = v;
} else {
retArray[doc] = new ComparableArray(v, termPosition);
}
} else {
if (ca instanceof ComparableArray) {
((ComparableArray) ca).insert(v, termPosition);
} else {
// Comparable to ComparableArray
for (int pos : positions.keySet()) {
retArray[pos] = new ComparableArray(retArray[pos], positions.get(pos));
}
positions = null;
usingSimpleComparable = false;
ComparableArray caNew = (ComparableArray) retArray[doc];
retArray[doc] = caNew.insert(v, termPosition);
}
}
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
ValueIndex value = new ValueIndex(retArray, setValues);
store(reader, field, prefix, value);
return value;
}
return ret;
}
use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.
the class ChainedTermEnumTest method createTermEnum.
protected TermEnum createTermEnum(String prefix, int numTerms) throws IOException {
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
try {
for (int i = 0; i < numTerms; i++) {
Document doc = new Document();
doc.add(new Field("field", true, prefix + i, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
writer.addDocument(doc);
}
} finally {
writer.close();
}
IndexReader reader = IndexReader.open(dir);
try {
TermEnum terms = reader.terms();
if (terms.term() == null) {
// position at first term
terms.next();
}
return terms;
} finally {
reader.close();
}
}
Aggregations