use of org.apache.lucene.document.TextField in project gitblit by gitblit.
the class TicketIndexer method ticketToDoc.
/**
* Creates a Lucene document from a ticket.
*
* @param ticket
* @return a Lucene document
*/
private Document ticketToDoc(TicketModel ticket) {
Document doc = new Document();
// repository and document ids for Lucene querying
toDocField(doc, Lucene.rid, StringUtils.getSHA1(ticket.repository));
toDocField(doc, Lucene.did, StringUtils.getSHA1(ticket.repository + ticket.number));
toDocField(doc, Lucene.project, ticket.project);
toDocField(doc, Lucene.repository, ticket.repository);
toDocField(doc, Lucene.number, ticket.number);
toDocField(doc, Lucene.title, ticket.title);
toDocField(doc, Lucene.body, ticket.body);
toDocField(doc, Lucene.created, ticket.created);
toDocField(doc, Lucene.createdby, ticket.createdBy);
toDocField(doc, Lucene.updated, ticket.updated);
toDocField(doc, Lucene.updatedby, ticket.updatedBy);
toDocField(doc, Lucene.responsible, ticket.responsible);
toDocField(doc, Lucene.milestone, ticket.milestone);
toDocField(doc, Lucene.topic, ticket.topic);
toDocField(doc, Lucene.status, ticket.status.name());
toDocField(doc, Lucene.comments, ticket.getComments().size());
toDocField(doc, Lucene.type, ticket.type == null ? null : ticket.type.name());
toDocField(doc, Lucene.mergesha, ticket.mergeSha);
toDocField(doc, Lucene.mergeto, ticket.mergeTo);
toDocField(doc, Lucene.labels, StringUtils.flattenStrings(ticket.getLabels(), ";").toLowerCase());
toDocField(doc, Lucene.participants, StringUtils.flattenStrings(ticket.getParticipants(), ";").toLowerCase());
toDocField(doc, Lucene.watchedby, StringUtils.flattenStrings(ticket.getWatchers(), ";").toLowerCase());
toDocField(doc, Lucene.mentions, StringUtils.flattenStrings(ticket.getMentions(), ";").toLowerCase());
toDocField(doc, Lucene.votes, ticket.getVoters().size());
toDocField(doc, Lucene.priority, ticket.priority.getValue());
toDocField(doc, Lucene.severity, ticket.severity.getValue());
List<String> attachments = new ArrayList<String>();
for (Attachment attachment : ticket.getAttachments()) {
attachments.add(attachment.name.toLowerCase());
}
toDocField(doc, Lucene.attachments, StringUtils.flattenStrings(attachments, ";"));
List<Patchset> patches = ticket.getPatchsets();
if (!patches.isEmpty()) {
toDocField(doc, Lucene.patchsets, patches.size());
Patchset patchset = patches.get(patches.size() - 1);
String flat = patchset.number + ":" + patchset.rev + ":" + patchset.tip + ":" + patchset.base + ":" + patchset.commits;
doc.add(new org.apache.lucene.document.Field(Lucene.patchset.name(), flat, TextField.TYPE_STORED));
}
doc.add(new TextField(Lucene.content.name(), ticket.toIndexableString(), Store.NO));
return doc;
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestEmptyTokenStream method testIndexWriter_LUCENE4656.
public void testIndexWriter_LUCENE4656() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));
TokenStream ts = new EmptyTokenStream();
assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new TextField("description", ts));
// this should not fail because we have no TermToBytesRefAttribute
writer.addDocument(doc);
assertEquals(1, writer.numDocs());
writer.close();
directory.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestBackwardsCompatibility method addDoc.
private void addDoc(IndexWriter writer, int id) throws IOException {
Document doc = new Document();
doc.add(new TextField("content", "aaa", Field.Store.NO));
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
customType2.setStoreTermVectorOffsets(true);
doc.add(new Field("autf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("utf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fieā±·ld", "field with non-ascii name", customType2));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
BytesRef ref = new BytesRef(bytes);
doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
doc.add(new DoubleDocValuesField("dvDouble", (double) id));
doc.add(new FloatDocValuesField("dvFloat", (float) id));
doc.add(new NumericDocValuesField("dvInt", id));
doc.add(new NumericDocValuesField("dvLong", id));
doc.add(new NumericDocValuesField("dvPacked", id));
doc.add(new NumericDocValuesField("dvShort", (short) id));
doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
doc.add(new IntPoint("intPoint1d", id));
doc.add(new IntPoint("intPoint2d", id, 2 * id));
doc.add(new FloatPoint("floatPoint1d", (float) id));
doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
doc.add(new LongPoint("longPoint1d", id));
doc.add(new LongPoint("longPoint2d", id, 2 * id));
doc.add(new DoublePoint("doublePoint1d", (double) id));
doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
doc.add(new BinaryPoint("binaryPoint1d", bytes));
doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setStoreTermVectors(true);
customType3.setStoreTermVectorPositions(true);
customType3.setStoreTermVectorOffsets(true);
customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setStoreTermVectors(true);
customType4.setStoreTermVectorPositions(false);
customType4.setStoreTermVectorOffsets(true);
customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
writer.addDocument(doc);
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestIndexWriter method testStopwordsPosIncHole.
// LUCENE-3849
public void testStopwordsPosIncHole() throws Exception {
Directory dir = newDirectory();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
return new TokenStreamComponents(tokenizer, stream);
}
};
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
Document doc = new Document();
doc.add(new TextField("body", "just a", Field.Store.NO));
doc.add(new TextField("body", "test of gaps", Field.Store.NO));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("body", "just"), 0);
builder.add(new Term("body", "test"), 2);
PhraseQuery pq = builder.build();
// body:"just ? test"
assertEquals(1, is.search(pq, 5).totalHits);
ir.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestIndexWriter method testNegativePositions.
// LUCENE-1255
public void testNegativePositions() throws Throwable {
final TokenStream tokens = new TokenStream() {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
final Iterator<String> terms = Arrays.asList("a", "b", "c").iterator();
boolean first = true;
@Override
public boolean incrementToken() {
if (!terms.hasNext())
return false;
clearAttributes();
termAtt.append(terms.next());
posIncrAtt.setPositionIncrement(first ? 0 : 1);
first = false;
return true;
}
};
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new TextField("field", tokens));
expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
});
w.close();
dir.close();
}
Aggregations