use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.
the class Posting method invertDocument.
// Tokenizes the fields of a document into Postings.
private final void invertDocument(Document doc) throws IOException {
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
String fieldName = field.name();
int fieldNumber = fieldInfos.fieldNumber(fieldName);
// length of field
int length = fieldLengths[fieldNumber];
// position in field
int position = fieldPositions[fieldNumber];
if (field.isIndexed()) {
if (!field.isTokenized()) {
// un-tokenized field
addPosition(fieldName, field.stringValue(), "Field", /*, 1*/
position++);
length++;
} else {
// find or make Reader
Reader reader;
if (field.readerValue() != null)
reader = field.readerValue();
else if (field.stringValue() != null)
reader = new StringReader(field.stringValue());
else
throw new IllegalArgumentException("field must have either String or Reader value");
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
try {
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
if (t.type() == null)
addPosition(fieldName, t.termText(), "*", /*, t.getPositionIncrement()*/
position++);
else
addPosition(fieldName, t.termText(), t.type(), /*, t.getPositionIncrement()*/
position++);
if (++length > maxFieldLength)
break;
}
} finally {
stream.close();
}
}
// save field length
fieldLengths[fieldNumber] = length;
// save field position
fieldPositions[fieldNumber] = position;
fieldBoosts[fieldNumber] *= field.getBoost();
}
}
}
use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.
the class FieldInfos method add.
/**
* Adds field info for a Document.
*/
public void add(Document doc) {
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
add(field.name(), field.isIndexed(), field.isTermVectorStored());
}
}
use of gate.creole.annic.apache.lucene.document.Field in project gate-core by GateNLP.
the class FieldsWriter method addDocument.
final void addDocument(Document doc) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
if (field.isStored())
storedCount++;
}
fieldsStream.writeVInt(storedCount);
fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
if (field.isStored()) {
fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
byte bits = 0;
if (field.isTokenized())
bits |= 1;
fieldsStream.writeByte(bits);
fieldsStream.writeString(field.stringValue());
}
}
}
Aggregations