Examples with TextField - org.apache.lucene.document.TextField

Example 36 with TextField

use of org.apache.lucene.document.TextField in project gitblit by gitblit.

the class TicketIndexer method ticketToDoc.

/**
	 * Creates a Lucene document from a ticket.
	 *
	 * @param ticket
	 * @return a Lucene document
	 */
private Document ticketToDoc(TicketModel ticket) {
    Document doc = new Document();
    // repository and document ids for Lucene querying
    toDocField(doc, Lucene.rid, StringUtils.getSHA1(ticket.repository));
    toDocField(doc, Lucene.did, StringUtils.getSHA1(ticket.repository + ticket.number));
    toDocField(doc, Lucene.project, ticket.project);
    toDocField(doc, Lucene.repository, ticket.repository);
    toDocField(doc, Lucene.number, ticket.number);
    toDocField(doc, Lucene.title, ticket.title);
    toDocField(doc, Lucene.body, ticket.body);
    toDocField(doc, Lucene.created, ticket.created);
    toDocField(doc, Lucene.createdby, ticket.createdBy);
    toDocField(doc, Lucene.updated, ticket.updated);
    toDocField(doc, Lucene.updatedby, ticket.updatedBy);
    toDocField(doc, Lucene.responsible, ticket.responsible);
    toDocField(doc, Lucene.milestone, ticket.milestone);
    toDocField(doc, Lucene.topic, ticket.topic);
    toDocField(doc, Lucene.status, ticket.status.name());
    toDocField(doc, Lucene.comments, ticket.getComments().size());
    toDocField(doc, Lucene.type, ticket.type == null ? null : ticket.type.name());
    toDocField(doc, Lucene.mergesha, ticket.mergeSha);
    toDocField(doc, Lucene.mergeto, ticket.mergeTo);
    toDocField(doc, Lucene.labels, StringUtils.flattenStrings(ticket.getLabels(), ";").toLowerCase());
    toDocField(doc, Lucene.participants, StringUtils.flattenStrings(ticket.getParticipants(), ";").toLowerCase());
    toDocField(doc, Lucene.watchedby, StringUtils.flattenStrings(ticket.getWatchers(), ";").toLowerCase());
    toDocField(doc, Lucene.mentions, StringUtils.flattenStrings(ticket.getMentions(), ";").toLowerCase());
    toDocField(doc, Lucene.votes, ticket.getVoters().size());
    toDocField(doc, Lucene.priority, ticket.priority.getValue());
    toDocField(doc, Lucene.severity, ticket.severity.getValue());
    List<String> attachments = new ArrayList<String>();
    for (Attachment attachment : ticket.getAttachments()) {
        attachments.add(attachment.name.toLowerCase());
    }
    toDocField(doc, Lucene.attachments, StringUtils.flattenStrings(attachments, ";"));
    List<Patchset> patches = ticket.getPatchsets();
    if (!patches.isEmpty()) {
        toDocField(doc, Lucene.patchsets, patches.size());
        Patchset patchset = patches.get(patches.size() - 1);
        String flat = patchset.number + ":" + patchset.rev + ":" + patchset.tip + ":" + patchset.base + ":" + patchset.commits;
        doc.add(new org.apache.lucene.document.Field(Lucene.patchset.name(), flat, TextField.TYPE_STORED));
    }
    doc.add(new TextField(Lucene.content.name(), ticket.toIndexableString(), Store.NO));
    return doc;
}

Also used : ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) Attachment(com.gitblit.models.TicketModel.Attachment) Patchset(com.gitblit.models.TicketModel.Patchset) Document(org.apache.lucene.document.Document)

Example 37 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class TestEmptyTokenStream method testIndexWriter_LUCENE4656.

public void testIndexWriter_LUCENE4656() throws IOException {
    Directory directory = newDirectory();
    IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));
    TokenStream ts = new EmptyTokenStream();
    assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
    Document doc = new Document();
    doc.add(new StringField("id", "0", Field.Store.YES));
    doc.add(new TextField("description", ts));
    // this should not fail because we have no TermToBytesRefAttribute
    writer.addDocument(doc);
    assertEquals(1, writer.numDocs());
    writer.close();
    directory.close();
}

Also used : TokenStream(org.apache.lucene.analysis.TokenStream) IndexWriter(org.apache.lucene.index.IndexWriter) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 38 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class TestBackwardsCompatibility method addDoc.

private void addDoc(IndexWriter writer, int id) throws IOException {
    Document doc = new Document();
    doc.add(new TextField("content", "aaa", Field.Store.NO));
    doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setStoreTermVectors(true);
    customType2.setStoreTermVectorPositions(true);
    customType2.setStoreTermVectorOffsets(true);
    doc.add(new Field("autf8", "Lu𝄞ce𝅘𝅥𝅮ne  ☠ ab񕰗cd", customType2));
    doc.add(new Field("utf8", "Lu𝄞ce𝅘𝅥𝅮ne  ☠ ab񕰗cd", customType2));
    doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
    doc.add(new Field("fieⱷld", "field with non-ascii name", customType2));
    // add docvalues fields
    doc.add(new NumericDocValuesField("dvByte", (byte) id));
    byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
    BytesRef ref = new BytesRef(bytes);
    doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
    doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
    doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
    doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
    doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
    doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
    doc.add(new DoubleDocValuesField("dvDouble", (double) id));
    doc.add(new FloatDocValuesField("dvFloat", (float) id));
    doc.add(new NumericDocValuesField("dvInt", id));
    doc.add(new NumericDocValuesField("dvLong", id));
    doc.add(new NumericDocValuesField("dvPacked", id));
    doc.add(new NumericDocValuesField("dvShort", (short) id));
    doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
    doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
    doc.add(new IntPoint("intPoint1d", id));
    doc.add(new IntPoint("intPoint2d", id, 2 * id));
    doc.add(new FloatPoint("floatPoint1d", (float) id));
    doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
    doc.add(new LongPoint("longPoint1d", id));
    doc.add(new LongPoint("longPoint2d", id, 2 * id));
    doc.add(new DoublePoint("doublePoint1d", (double) id));
    doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
    doc.add(new BinaryPoint("binaryPoint1d", bytes));
    doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
    // a field with both offsets and term vectors for a cross-check
    FieldType customType3 = new FieldType(TextField.TYPE_STORED);
    customType3.setStoreTermVectors(true);
    customType3.setStoreTermVectorPositions(true);
    customType3.setStoreTermVectorOffsets(true);
    customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
    // a field that omits only positions
    FieldType customType4 = new FieldType(TextField.TYPE_STORED);
    customType4.setStoreTermVectors(true);
    customType4.setStoreTermVectorPositions(false);
    customType4.setStoreTermVectorOffsets(true);
    customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
    // TODO: 
    //   index different norms types via similarity (we use a random one currently?!)
    //   remove any analyzer randomness, explicitly add payloads for certain fields.
    writer.addDocument(doc);
}

Also used : BinaryPoint(org.apache.lucene.document.BinaryPoint) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) FieldType(org.apache.lucene.document.FieldType) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) TextField(org.apache.lucene.document.TextField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 39 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class TestIndexWriter method testStopwordsPosIncHole.

// LUCENE-3849
public void testStopwordsPosIncHole() throws Exception {
    Directory dir = newDirectory();
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
    Document doc = new Document();
    doc.add(new TextField("body", "just a", Field.Store.NO));
    doc.add(new TextField("body", "test of gaps", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(new Term("body", "just"), 0);
    builder.add(new Term("body", "test"), 2);
    PhraseQuery pq = builder.build();
    // body:"just ? test"
    assertEquals(1, is.search(pq, 5).totalHits);
    ir.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) MockTokenFilter(org.apache.lucene.analysis.MockTokenFilter) PhraseQuery(org.apache.lucene.search.PhraseQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TextField(org.apache.lucene.document.TextField) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory)

Example 40 with TextField

use of org.apache.lucene.document.TextField in project lucene-solr by apache.

the class TestIndexWriter method testNegativePositions.

// LUCENE-1255
public void testNegativePositions() throws Throwable {
    final TokenStream tokens = new TokenStream() {

        final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

        final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

        final Iterator<String> terms = Arrays.asList("a", "b", "c").iterator();

        boolean first = true;

        @Override
        public boolean incrementToken() {
            if (!terms.hasNext())
                return false;
            clearAttributes();
            termAtt.append(terms.next());
            posIncrAtt.setPositionIncrement(first ? 0 : 1);
            first = false;
            return true;
        }
    };
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new TextField("field", tokens));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
    });
    w.close();
    dir.close();
}

Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Iterator(java.util.Iterator) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory)

Aggregations

TextField (org.apache.lucene.document.TextField)192 Document (org.apache.lucene.document.Document)171 Directory (org.apache.lucene.store.Directory)99 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 Term (org.apache.lucene.index.Term)61 IndexWriter (org.apache.lucene.index.IndexWriter)58 IndexSearcher (org.apache.lucene.search.IndexSearcher)55 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)52 Field (org.apache.lucene.document.Field)50 StringField (org.apache.lucene.document.StringField)48 BytesRef (org.apache.lucene.util.BytesRef)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)44 IndexReader (org.apache.lucene.index.IndexReader)43 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)30 TopDocs (org.apache.lucene.search.TopDocs)29 RAMDirectory (org.apache.lucene.store.RAMDirectory)29 FieldType (org.apache.lucene.document.FieldType)23 Query (org.apache.lucene.search.Query)23