use of org.apache.lucene.index.TermDocs in project zm-mailbox by Zimbra.
the class RemoteMailQueue method summarize.
private void summarize(SearchResult result, IndexReader indexReader) throws IOException {
TermEnum terms = indexReader.terms();
boolean hasDeletions = indexReader.hasDeletions();
do {
Term term = terms.term();
if (term != null) {
String field = term.field();
if (field != null && field.length() > 0) {
QueueAttr attr = QueueAttr.valueOf(field);
if (attr == QueueAttr.addr || attr == QueueAttr.host || attr == QueueAttr.from || attr == QueueAttr.to || attr == QueueAttr.fromdomain || attr == QueueAttr.todomain || attr == QueueAttr.reason || attr == QueueAttr.received) {
List<SummaryItem> list = result.sitems.get(attr);
if (list == null) {
list = new LinkedList<SummaryItem>();
result.sitems.put(attr, list);
}
int count = 0;
if (hasDeletions) {
TermDocs termDocs = indexReader.termDocs(term);
while (termDocs.next()) {
if (!indexReader.isDeleted(termDocs.doc())) {
count++;
}
}
} else {
count = terms.docFreq();
}
if (count > 0) {
list.add(new SummaryItem(term.text(), count));
}
}
}
}
} while (terms.next());
}
use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class AbstractExcerpt method getExcerpt.
/**
* {@inheritDoc}
*/
public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException {
IndexReader reader = index.getIndexReader();
try {
checkRewritten(reader);
Term idTerm = TermFactory.createUUIDTerm(id.toString());
TermDocs tDocs = reader.termDocs(idTerm);
int docNumber;
Document doc;
try {
if (tDocs.next()) {
docNumber = tDocs.doc();
doc = reader.document(docNumber);
} else {
// node not found in index
return null;
}
} finally {
tDocs.close();
}
Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT);
if (fields.length == 0) {
log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName());
SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
exProvider.init(query, index);
return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
}
StringBuffer text = new StringBuffer();
String separator = "";
for (int i = 0; i < fields.length; i++) {
if (fields[i].stringValue().length() == 0) {
continue;
}
text.append(separator);
text.append(fields[i].stringValue());
separator = " ";
}
TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT);
if (tfv instanceof TermPositionVector) {
return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize);
} else {
log.debug("No TermPositionVector on Fulltext field.");
return null;
}
} finally {
Util.closeOrRelease(reader);
}
}
use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class CachingIndexReader method getParent.
/**
* Returns the <code>DocId</code> of the parent of <code>n</code> or
* {@link DocId#NULL} if <code>n</code> does not have a parent
* (<code>n</code> is the root node).
*
* @param n the document number.
* @param deleted the documents that should be regarded as deleted.
* @return the <code>DocId</code> of <code>n</code>'s parent.
* @throws IOException if an error occurs while reading from the index.
*/
DocId getParent(int n, BitSet deleted) throws IOException {
DocId parent;
boolean existing = false;
int parentDocNum = inSegmentParents[n];
if (parentDocNum != -1) {
parent = DocId.create(parentDocNum);
} else {
parent = foreignParentDocIds.get(n);
}
if (parent != null) {
existing = true;
// check if valid and reset if necessary
if (!parent.isValid(deleted)) {
if (log.isDebugEnabled()) {
log.debug(parent + " not valid anymore.");
}
parent = null;
}
}
if (parent == null) {
int plainDocId = -1;
Document doc = document(n, FieldSelectors.UUID_AND_PARENT);
String[] parentUUIDs = doc.getValues(FieldNames.PARENT);
if (parentUUIDs.length == 0 || parentUUIDs[0].length() == 0) {
// root node
parent = DocId.NULL;
} else {
if (shareableNodes.get(n)) {
parent = DocId.create(parentUUIDs);
} else {
if (!existing) {
Term id = TermFactory.createUUIDTerm(parentUUIDs[0]);
TermDocs docs = termDocs(id);
try {
while (docs.next()) {
if (!deleted.get(docs.doc())) {
plainDocId = docs.doc();
parent = DocId.create(plainDocId);
break;
}
}
} finally {
docs.close();
}
}
// DocId was invalid. thus, only allowed to create DocId from uuid
if (parent == null) {
parent = DocId.create(parentUUIDs[0]);
}
}
}
// finally put to cache
if (plainDocId != -1) {
// PlainDocId
inSegmentParents[n] = plainDocId;
} else {
// UUIDDocId
foreignParentDocIds.put(n, parent);
if (existing) {
// there was an existing parent reference in
// inSegmentParents, which was invalid and is replaced
// with a UUIDDocId (points to a foreign segment).
// mark as unknown
inSegmentParents[n] = -1;
}
}
}
return parent;
}
use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class IndexingQueue method initialize.
/**
* Initializes the indexing queue.
*
* @param index the multi index this indexing queue belongs to.
* @throws IOException if an error occurs while reading from the index.
*/
void initialize(MultiIndex index) throws IOException {
if (initialized) {
throw new IllegalStateException("already initialized");
}
// check index for nodes that need to be reindexed
CachingMultiIndexReader reader = index.getIndexReader();
try {
TermDocs tDocs = reader.termDocs(new Term(FieldNames.REINDEXING_REQUIRED, ""));
try {
while (tDocs.next()) {
queueStore.addUUID(reader.document(tDocs.doc(), FieldSelectors.UUID).get(FieldNames.UUID));
}
} finally {
tDocs.close();
}
} finally {
reader.release();
}
String[] uuids = queueStore.getPending();
for (String uuid : uuids) {
try {
Document doc = index.createDocument(new NodeId(uuid));
pendingDocuments.put(uuid, doc);
log.debug("added node {}. New size of indexing queue: {}", uuid, pendingDocuments.size());
} catch (IllegalArgumentException e) {
log.warn("Invalid UUID in indexing queue store: " + uuid);
} catch (RepositoryException e) {
// node does not exist anymore
log.debug("Node with uuid {} does not exist anymore", uuid);
queueStore.removeUUID(uuid);
}
}
initialized = true;
}
use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class SharedFieldCache method getValueIndex.
/**
* Creates a <code>ValueIndex</code> for a <code>field</code> and a term
* <code>prefix</code>. The term prefix acts as the property name for the
* shared <code>field</code>.
* <p>
* This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
*
* @param reader the <code>IndexReader</code>.
* @param field name of the shared field.
* @param prefix the property name, will be used as term prefix.
* @return a ValueIndex that contains the field values and order
* information.
* @throws IOException if an error occurs while reading from the index.
*/
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix) throws IOException {
if (reader instanceof ReadOnlyIndexReader) {
reader = ((ReadOnlyIndexReader) reader).getBase();
}
field = field.intern();
ValueIndex ret = lookup(reader, field, prefix);
if (ret == null) {
final int maxDocs = reader.maxDoc();
Comparable<?>[] retArray = new Comparable<?>[maxDocs];
Map<Integer, Integer> positions = new HashMap<Integer, Integer>();
boolean usingSimpleComparable = true;
int setValues = 0;
if (maxDocs > 0) {
IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
TermDocs termDocs;
byte[] payload = null;
int type;
if (hasPayloads) {
termDocs = reader.termPositions();
payload = new byte[1];
} else {
termDocs = reader.termDocs();
}
TermEnum termEnum = reader.terms(new Term(field, prefix));
try {
if (termEnum.term() == null) {
throw new RuntimeException("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field || !term.text().startsWith(prefix)) {
break;
}
final String value = termValueAsString(term, prefix);
termDocs.seek(term);
while (termDocs.next()) {
int termPosition = 0;
type = PropertyType.UNDEFINED;
if (hasPayloads) {
TermPositions termPos = (TermPositions) termDocs;
termPosition = termPos.nextPosition();
if (termPos.isPayloadAvailable()) {
payload = termPos.getPayload(payload, 0);
type = PropertyMetaData.fromByteArray(payload).getPropertyType();
}
}
setValues++;
Comparable<?> v = getValue(value, type);
int doc = termDocs.doc();
Comparable<?> ca = retArray[doc];
if (ca == null) {
if (usingSimpleComparable) {
// put simple value on the queue
positions.put(doc, termPosition);
retArray[doc] = v;
} else {
retArray[doc] = new ComparableArray(v, termPosition);
}
} else {
if (ca instanceof ComparableArray) {
((ComparableArray) ca).insert(v, termPosition);
} else {
// Comparable to ComparableArray
for (int pos : positions.keySet()) {
retArray[pos] = new ComparableArray(retArray[pos], positions.get(pos));
}
positions = null;
usingSimpleComparable = false;
ComparableArray caNew = (ComparableArray) retArray[doc];
retArray[doc] = caNew.insert(v, termPosition);
}
}
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
ValueIndex value = new ValueIndex(retArray, setValues);
store(reader, field, prefix, value);
return value;
}
return ret;
}
Aggregations