use of org.apache.lucene.document.Document in project camel by apache.
the class LuceneSearcher method search.
public Hits search(String searchPhrase, int maxNumberOfHits, Version luceneVersion, boolean returnLuceneDocs) throws Exception {
Hits searchHits = new Hits();
int numberOfHits = doSearch(searchPhrase, maxNumberOfHits, luceneVersion);
searchHits.setNumberOfHits(numberOfHits);
for (ScoreDoc hit : hits) {
Document selectedDocument = indexSearcher.doc(hit.doc);
Hit aHit = new Hit();
if (returnLuceneDocs) {
aHit.setDocument(selectedDocument);
}
aHit.setHitLocation(hit.doc);
aHit.setScore(hit.score);
aHit.setData(selectedDocument.get("contents"));
searchHits.getHit().add(aHit);
}
return searchHits;
}
use of org.apache.lucene.document.Document in project zeppelin by apache.
the class LuceneSearch method doSearch.
private List<Map<String, String>> doSearch(IndexSearcher searcher, Query query, Analyzer analyzer, Highlighter highlighter) {
List<Map<String, String>> matchingParagraphs = Lists.newArrayList();
ScoreDoc[] hits;
try {
hits = searcher.search(query, 20).scoreDocs;
for (int i = 0; i < hits.length; i++) {
LOG.debug("doc={} score={}", hits[i].doc, hits[i].score);
int id = hits[i].doc;
Document doc = searcher.doc(id);
String path = doc.get(ID_FIELD);
if (path != null) {
LOG.debug((i + 1) + ". " + path);
String title = doc.get("title");
if (title != null) {
LOG.debug(" Title: {}", doc.get("title"));
}
String text = doc.get(SEARCH_FIELD_TEXT);
String header = doc.get(SEARCH_FIELD_TITLE);
String fragment = "";
if (text != null) {
TokenStream tokenStream = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TEXT, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3);
LOG.debug(" {} fragments found for query '{}'", frag.length, query);
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
LOG.debug(" Fragment: {}", frag[j].toString());
}
}
fragment = (frag != null && frag.length > 0) ? frag[0].toString() : "";
}
if (header != null) {
TokenStream tokenTitle = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TITLE, analyzer);
TextFragment[] frgTitle = highlighter.getBestTextFragments(tokenTitle, header, true, 3);
header = (frgTitle != null && frgTitle.length > 0) ? frgTitle[0].toString() : "";
} else {
header = "";
}
matchingParagraphs.add(// <noteId>/paragraph/<paragraphId>
ImmutableMap.of(// <noteId>/paragraph/<paragraphId>
"id", // <noteId>/paragraph/<paragraphId>
path, "name", title, "snippet", fragment, "text", text, "header", header));
} else {
LOG.info("{}. No {} for this document", i + 1, ID_FIELD);
}
}
} catch (IOException | InvalidTokenOffsetsException e) {
LOG.error("Exception on searching for {}", query, e);
}
return matchingParagraphs;
}
use of org.apache.lucene.document.Document in project zeppelin by apache.
the class LuceneSearch method updateDoc.
/**
* Updates index for the given note: either note.name or a paragraph If
* paragraph is <code>null</code> - updates only for the note.name
*
* @param noteId
* @param noteName
* @param p
* @throws IOException
*/
private void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
String id = formatId(noteId, p);
Document doc = newDocument(id, noteName, p);
try {
writer.updateDocument(new Term(ID_FIELD, id), doc);
writer.commit();
} catch (IOException e) {
LOG.error("Failed to updaet index of notebook {}", noteId, e);
}
}
use of org.apache.lucene.document.Document in project zeppelin by apache.
the class LuceneSearch method newDocument.
/**
* If paragraph is not null, indexes code in the paragraph, otherwise indexes
* the notebook name.
*
* @param id id of the document, different for Note name and paragraph
* @param noteName name of the note
* @param p paragraph
* @return
*/
private Document newDocument(String id, String noteName, Paragraph p) {
Document doc = new Document();
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
doc.add(pathField);
doc.add(new StringField("title", noteName, Field.Store.YES));
if (null != p) {
doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES));
if (p.getTitle() != null) {
doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES));
}
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
} else {
doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES));
}
return doc;
}
use of org.apache.lucene.document.Document in project crate by crate.
the class OrderedLuceneBatchIteratorBenchmark method createLuceneBatchIterator.
@Setup
public void createLuceneBatchIterator() throws Exception {
IndexWriter iw = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
dummyShardId = new ShardId("dummy", 1);
columnName = "x";
for (int i = 0; i < 10_000_000; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField(columnName, i));
iw.addDocument(doc);
}
iw.commit();
iw.forceMerge(1, true);
indexSearcher = new IndexSearcher(DirectoryReader.open(iw, true));
collectorContext = new CollectorContext(mock(IndexFieldDataService.class), new CollectorFieldsVisitor(0));
fieldTypeLookup = column -> {
IntegerFieldMapper.IntegerFieldType integerFieldType = new IntegerFieldMapper.IntegerFieldType();
integerFieldType.setNames(new MappedFieldType.Names(column));
return integerFieldType;
};
reference = new Reference(new ReferenceIdent(new TableIdent(null, "dummyTable"), columnName), RowGranularity.DOC, DataTypes.INTEGER);
orderBy = new OrderBy(Collections.singletonList(reference), reverseFlags, nullsFirst);
}
Aggregations