use of org.apache.lucene.index.TermPositionVector in project jackrabbit by apache.
the class AbstractExcerpt method createTermPositionVector.
/**
* @param text the text.
* @return a <code>TermPositionVector</code> for the given text.
*/
private TermPositionVector createTermPositionVector(String text) {
// term -> TermVectorOffsetInfo[]
final SortedMap<String, TermVectorOffsetInfo[]> termMap = new TreeMap<String, TermVectorOffsetInfo[]>();
Reader r = new StringReader(text);
TokenStream ts = index.getTextAnalyzer().tokenStream("", r);
try {
while (ts.incrementToken()) {
OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
TermAttribute term = ts.getAttribute(TermAttribute.class);
String termText = term.term();
TermVectorOffsetInfo[] info = termMap.get(termText);
if (info == null) {
info = new TermVectorOffsetInfo[1];
} else {
TermVectorOffsetInfo[] tmp = info;
info = new TermVectorOffsetInfo[tmp.length + 1];
System.arraycopy(tmp, 0, info, 0, tmp.length);
}
info[info.length - 1] = new TermVectorOffsetInfo(offset.startOffset(), offset.endOffset());
termMap.put(termText, info);
}
ts.end();
ts.close();
} catch (IOException e) {
// should never happen, we are reading from a string
}
return new TermPositionVector() {
private String[] terms = (String[]) termMap.keySet().toArray(new String[termMap.size()]);
public int[] getTermPositions(int index) {
return null;
}
public TermVectorOffsetInfo[] getOffsets(int index) {
TermVectorOffsetInfo[] info = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
if (index >= 0 && index < terms.length) {
info = termMap.get(terms[index]);
}
return info;
}
public String getField() {
return "";
}
public int size() {
return terms.length;
}
public String[] getTerms() {
return terms;
}
public int[] getTermFrequencies() {
int[] freqs = new int[terms.length];
for (int i = 0; i < terms.length; i++) {
freqs[i] = termMap.get(terms[i]).length;
}
return freqs;
}
public int indexOf(String term) {
int res = Arrays.binarySearch(terms, term);
return res >= 0 ? res : -1;
}
public int[] indexesOf(String[] terms, int start, int len) {
int[] res = new int[len];
for (int i = 0; i < len; i++) {
res[i] = indexOf(terms[i]);
}
return res;
}
};
}
use of org.apache.lucene.index.TermPositionVector in project jackrabbit by apache.
the class AbstractExcerpt method getExcerpt.
/**
* {@inheritDoc}
*/
public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException {
IndexReader reader = index.getIndexReader();
try {
checkRewritten(reader);
Term idTerm = TermFactory.createUUIDTerm(id.toString());
TermDocs tDocs = reader.termDocs(idTerm);
int docNumber;
Document doc;
try {
if (tDocs.next()) {
docNumber = tDocs.doc();
doc = reader.document(docNumber);
} else {
// node not found in index
return null;
}
} finally {
tDocs.close();
}
Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT);
if (fields.length == 0) {
log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName());
SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
exProvider.init(query, index);
return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
}
StringBuffer text = new StringBuffer();
String separator = "";
for (int i = 0; i < fields.length; i++) {
if (fields[i].stringValue().length() == 0) {
continue;
}
text.append(separator);
text.append(fields[i].stringValue());
separator = " ";
}
TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT);
if (tfv instanceof TermPositionVector) {
return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize);
} else {
log.debug("No TermPositionVector on Fulltext field.");
return null;
}
} finally {
Util.closeOrRelease(reader);
}
}
Aggregations