Search in sources :

Example 1 with Field

use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.

the class Posting method invertDocument.

// Tokenizes the fields of a document into Postings.
private final void invertDocument(Document doc) throws IOException {
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
        Field field = (Field) fields.nextElement();
        String fieldName = field.name();
        int fieldNumber = fieldInfos.fieldNumber(fieldName);
        // length of field
        int length = fieldLengths[fieldNumber];
        // position in field
        int position = fieldPositions[fieldNumber];
        if (field.isIndexed()) {
            if (!field.isTokenized()) {
                // un-tokenized field
                addPosition(fieldName, field.stringValue(), "Field", /*, 1*/
                position++);
                length++;
            } else {
                // find or make Reader
                Reader reader;
                if (field.readerValue() != null)
                    reader = field.readerValue();
                else if (field.stringValue() != null)
                    reader = new StringReader(field.stringValue());
                else
                    throw new IllegalArgumentException("field must have either String or Reader value");
                // Tokenize field and add to postingTable
                TokenStream stream = analyzer.tokenStream(fieldName, reader);
                try {
                    for (Token t = stream.next(); t != null; t = stream.next()) {
                        position += (t.getPositionIncrement() - 1);
                        if (t.type() == null)
                            addPosition(fieldName, t.termText(), "*", /*, t.getPositionIncrement()*/
                            position++);
                        else
                            addPosition(fieldName, t.termText(), t.type(), /*, t.getPositionIncrement()*/
                            position++);
                        if (++length > maxFieldLength)
                            break;
                    }
                } finally {
                    stream.close();
                }
            }
            // save field length
            fieldLengths[fieldNumber] = length;
            // save field position
            fieldPositions[fieldNumber] = position;
            fieldBoosts[fieldNumber] *= field.getBoost();
        }
    }
}
Also used : Field(gate.creole.annic.apache.lucene.document.Field) TokenStream(gate.creole.annic.apache.lucene.analysis.TokenStream) Enumeration(java.util.Enumeration) StringReader(java.io.StringReader) Reader(java.io.Reader) StringReader(java.io.StringReader) Token(gate.creole.annic.apache.lucene.analysis.Token)

Example 2 with Field

use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.

the class FieldInfos method add.

/**
 * Adds field info for a Document.
 */
public void add(Document doc) {
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
        Field field = (Field) fields.nextElement();
        add(field.name(), field.isIndexed(), field.isTermVectorStored());
    }
}
Also used : Field(gate.creole.annic.apache.lucene.document.Field) Enumeration(java.util.Enumeration)

Example 3 with Field

use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.

the class FieldsWriter method addDocument.

final void addDocument(Document doc) throws IOException {
    indexStream.writeLong(fieldsStream.getFilePointer());
    int storedCount = 0;
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
        Field field = (Field) fields.nextElement();
        if (field.isStored())
            storedCount++;
    }
    fieldsStream.writeVInt(storedCount);
    fields = doc.fields();
    while (fields.hasMoreElements()) {
        Field field = (Field) fields.nextElement();
        if (field.isStored()) {
            fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
            byte bits = 0;
            if (field.isTokenized())
                bits |= 1;
            fieldsStream.writeByte(bits);
            fieldsStream.writeString(field.stringValue());
        }
    }
}
Also used : Field(gate.creole.annic.apache.lucene.document.Field) Enumeration(java.util.Enumeration)

Aggregations

Field (gate.creole.annic.apache.lucene.document.Field)3 Enumeration (java.util.Enumeration)3 Token (gate.creole.annic.apache.lucene.analysis.Token)1 TokenStream (gate.creole.annic.apache.lucene.analysis.TokenStream)1 Reader (java.io.Reader)1 StringReader (java.io.StringReader)1