use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.
the class MatchAllScorer method calculateDocFilter.
/**
* Calculates a BitSet filter that includes all the nodes
* that have content in properties according to the field name
* passed in the constructor of this MatchAllScorer.
*
* @throws IOException if an error occurs while reading from
* the search index.
*/
@SuppressWarnings({ "unchecked" })
private void calculateDocFilter(PerQueryCache cache) throws IOException {
Map<String, BitSet> readerCache = (Map<String, BitSet>) cache.get(MatchAllScorer.class, reader);
if (readerCache == null) {
readerCache = new HashMap<String, BitSet>();
cache.put(MatchAllScorer.class, reader, readerCache);
}
// get BitSet for field
docFilter = readerCache.get(field);
if (docFilter != null) {
// use cached BitSet;
return;
}
// otherwise calculate new
docFilter = new BitSet(reader.maxDoc());
// we match all terms
String namedValue = FieldNames.createNamedValue(field, "");
TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, namedValue));
try {
TermDocs docs = reader.termDocs();
try {
while (terms.term() != null && terms.term().field() == FieldNames.PROPERTIES && terms.term().text().startsWith(namedValue)) {
docs.seek(terms);
while (docs.next()) {
docFilter.set(docs.doc());
}
terms.next();
}
} finally {
docs.close();
}
} finally {
terms.close();
}
// put BitSet into cache
readerCache.put(field, docFilter);
}
use of org.apache.lucene.index.TermEnum in project jackrabbit by apache.
the class EquiJoin method getPropertyTerms.
private Set<Map.Entry<Term, String>> getPropertyTerms(String property) throws IOException {
Map<Term, String> map = new HashMap<Term, String>();
Term prefix = new Term(FieldNames.PROPERTIES, FieldNames.createNamedValue(property, ""));
TermEnum terms = reader.terms(prefix);
do {
Term term = terms.term();
if (term == null || !term.field().equals(prefix.field()) || !term.text().startsWith(prefix.text())) {
break;
}
map.put(term, term.text().substring(prefix.text().length()));
} while (terms.next());
return map.entrySet();
}
use of org.apache.lucene.index.TermEnum in project zm-mailbox by Zimbra.
the class LuceneViewer method dumpTerms.
private void dumpTerms() throws IOException {
outputBanner("Terms (in Term.compareTo() order)");
TermEnum terms = mIndexReader.terms();
int order = 0;
while (terms.next()) {
order++;
Term term = terms.term();
String field = term.field();
String text = term.text();
if (!wantThisTerm(field, text)) {
continue;
}
outputLn(order + " " + field + ": " + text);
/*
* for each term, print the
* <document, frequency, <position>* > tuples for a term.
*
* document: document in which the Term appears
* frequency: number of time the Term appears in the document
* position: position for each appearance in the document
*
* e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED));
* then the tuple for Term("field", "two") in this document would be like:
* 88, 2, <2, 4>
* where
* 88 is the document number
* 2 is the frequency this term appear in the document
* <2, 4> are the positions for each appearance in the document
*/
// by TermPositions
outputLn(" document, frequency, <position>*");
// keep track of docs that appear in all terms that are filtered in.
Set<Integer> docNums = null;
if (hasFilters()) {
docNums = new HashSet<Integer>();
}
TermPositions termPos = mIndexReader.termPositions(term);
while (termPos.next()) {
int docNum = termPos.doc();
int freq = termPos.freq();
if (docNums != null) {
docNums.add(docNum);
}
output(" " + docNum + ", " + freq + ", <");
boolean first = true;
for (int f = 0; f < freq; f++) {
int positionInDoc = termPos.nextPosition();
if (!first) {
output(" ");
} else {
first = false;
}
output(positionInDoc + "");
}
outputLn(">");
}
termPos.close();
if (docNums != null) {
computeDocsIntersection(docNums);
}
outputLn();
if (order % 1000 == 0) {
mConsole.debug("Dumped " + order + " terms");
}
}
terms.close();
}
use of org.apache.lucene.index.TermEnum in project bigbluebutton by bigbluebutton.
the class Index method startIndex.
public void startIndex(String uid) {
try {
IndexReader.unlock(FSDirectory.getDirectory(ConfigHandler.indexPath));
if (logger.isInfoEnabled()) {
logger.info("index file path " + ConfigHandler.indexPath);
}
reader = IndexReader.open(ConfigHandler.indexPath);
TermEnum uidIter = reader.terms(new Term("uid"));
while (uidIter.term() != null) {
if (uid.equalsIgnoreCase(uidIter.term().text())) {
reader.deleteDocuments(uidIter.term());
}
uidIter.next();
}
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
writer = new IndexWriter(ConfigHandler.indexPath, new StandardAnalyzer(), new IndexWriter.MaxFieldLength(1000000));
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.lucene.index.TermEnum in project jackrabbit-oak by apache.
the class RepositoryUpgrade method assertNoLongNames.
void assertNoLongNames() throws RepositoryException {
Session session = source.getRepository().login(null, null);
boolean longNameFound = false;
try {
IndexReader reader = IndexAccessor.getReader(source);
if (reader == null) {
return;
}
TermEnum terms = reader.terms(new Term(FieldNames.LOCAL_NAME));
while (terms.next()) {
Term t = terms.term();
if (!FieldNames.LOCAL_NAME.equals(t.field())) {
continue;
}
String name = t.text();
if (nameMayBeTooLong(name)) {
TermDocs docs = reader.termDocs(t);
if (docs.next()) {
int docId = docs.doc();
String uuid = reader.document(docId).get(FieldNames.UUID);
Node n = session.getNodeByIdentifier(uuid);
if (isNameTooLong(n.getName(), n.getParent().getPath())) {
logger.warn("Name too long: {}", n.getPath());
longNameFound = true;
}
}
}
}
} catch (IOException e) {
throw new RepositoryException(e);
} finally {
session.logout();
}
if (longNameFound) {
logger.error("Node with a long name has been found. Please fix the content or rerun the migration with {} option.", SKIP_NAME_CHECK);
throw new RepositoryException("Node with a long name has been found.");
}
}
Aggregations