use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class ReadTokensTask method doLogic.
@Override
public int doLogic() throws Exception {
List<IndexableField> fields = doc.getFields();
Analyzer analyzer = getRunData().getAnalyzer();
int tokenCount = 0;
for (final IndexableField field : fields) {
if (field.fieldType().indexOptions() == IndexOptions.NONE || field.fieldType().tokenized() == false) {
continue;
}
final TokenStream stream = field.tokenStream(analyzer, null);
// reset the TokenStream to the first token
stream.reset();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while (stream.incrementToken()) {
termAtt.getBytesRef();
tokenCount++;
}
stream.end();
stream.close();
}
totalTokenCount += tokenCount;
return tokenCount;
}
use of org.apache.lucene.index.IndexableField in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getExcerpt.
private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, FieldInfos fieldInfos) throws IOException {
StringBuilder excerpt = new StringBuilder();
int docID = doc.doc;
List<String> names = new LinkedList<String>();
for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) {
String name = field.name();
// postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored.
if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) {
names.add(name);
}
}
if (names.size() > 0) {
int[] maxPassages = new int[names.size()];
for (int i = 0; i < maxPassages.length; i++) {
maxPassages[i] = 1;
}
try {
Map<String, String[]> stringMap = postingsHighlighter.highlightFields(names.toArray(new String[names.size()]), query, searcher, new int[] { docID }, maxPassages);
for (Map.Entry<String, String[]> entry : stringMap.entrySet()) {
String value = Arrays.toString(entry.getValue());
if (value.contains("<b>")) {
if (excerpt.length() > 0) {
excerpt.append("...");
}
excerpt.append(value);
}
}
} catch (Exception e) {
LOG.error("postings highlighting failed", e);
}
}
// fallback if no excerpt could be retrieved using postings highlighter
if (excerpt.length() == 0) {
for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
String name = field.name();
// only full text or analyzed fields
if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
String text = field.stringValue();
TokenStream tokenStream = analyzer.tokenStream(name, text);
try {
TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1);
if (textFragments != null && textFragments.length > 0) {
for (TextFragment fragment : textFragments) {
if (excerpt.length() > 0) {
excerpt.append("...");
}
excerpt.append(fragment.toString());
}
break;
}
} catch (InvalidTokenOffsetsException e) {
LOG.error("higlighting failed", e);
}
}
}
}
return excerpt.toString();
}
use of org.apache.lucene.index.IndexableField in project nifi by apache.
the class DocsReader method getByteOffset.
private long getByteOffset(final Document d, final RecordReader reader) {
final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
if (blockField != null) {
final int blockIndex = blockField.numericValue().intValue();
final TocReader tocReader = reader.getTocReader();
return tocReader.getBlockOffset(blockIndex);
}
return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
}
use of org.apache.lucene.index.IndexableField in project ddf by codice.
the class TestGeoNamesQueryLuceneIndex method createDocumentFromGeoEntry.
private Document createDocumentFromGeoEntry(final GeoEntry geoEntry) {
final Document document = new Document();
document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
document.add(new StringField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
for (IndexableField field : strategy.createIndexableFields(point)) {
document.add(field);
}
return document;
}
use of org.apache.lucene.index.IndexableField in project ddf by codice.
the class GeoNamesLuceneIndexer method addDocument.
private void addDocument(final IndexWriter indexWriter, final GeoEntry geoEntry, final SpatialStrategy strategy) throws IOException {
final Document document = new Document();
document.add(new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude()));
document.add(new StoredField(GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude()));
document.add(new StringField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode(), Field.Store.YES));
document.add(new TextField(GeoNamesLuceneConstants.COUNTRY_CODE_FIELD, geoEntry.getCountryCode(), Field.Store.YES));
document.add(new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
// This DocValues field is used for sorting by population.
document.add(new NumericDocValuesField(GeoNamesLuceneConstants.POPULATION_DOCVALUES_FIELD, geoEntry.getPopulation()));
document.add(new TextField(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, geoEntry.getAlternateNames(), Field.Store.NO));
// Add each entry's spatial information for fast spatial filtering.
final Shape point = SPATIAL_CONTEXT.getShapeFactory().pointXY(geoEntry.getLongitude(), geoEntry.getLatitude());
for (IndexableField field : strategy.createIndexableFields(point)) {
document.add(field);
}
final float boost = calculateBoost(geoEntry);
document.add(new FloatDocValuesField(GeoNamesLuceneConstants.BOOST_FIELD, boost));
indexWriter.addDocument(document);
}
Aggregations