use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestIndexWriterMerging method testForceMergeDeletes.
// LUCENE-325: test forceMergeDeletes, when 2 singular merges
// are required
public void testForceMergeDeletes() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
Document document = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
FieldType customType1 = new FieldType(TextField.TYPE_STORED);
customType1.setTokenized(false);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
Field idField = newStringField("id", "", Field.Store.NO);
document.add(idField);
Field storedField = newField("stored", "stored", customType);
document.add(storedField);
Field termVectorField = newField("termVector", "termVector", customType1);
document.add(termVectorField);
for (int i = 0; i < 10; i++) {
idField.setStringValue("" + i);
writer.addDocument(document);
}
writer.close();
IndexReader ir = DirectoryReader.open(dir);
assertEquals(10, ir.maxDoc());
assertEquals(10, ir.numDocs());
ir.close();
IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE);
writer = new IndexWriter(dir, dontMergeConfig);
writer.deleteDocuments(new Term("id", "0"));
writer.deleteDocuments(new Term("id", "7"));
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(8, ir.numDocs());
ir.close();
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
assertEquals(8, writer.numDocs());
assertEquals(10, writer.maxDoc());
writer.forceMergeDeletes();
assertEquals(8, writer.numDocs());
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(8, ir.maxDoc());
assertEquals(8, ir.numDocs());
ir.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestBackwardsCompatibility method addNoProxDoc.
private void addNoProxDoc(IndexWriter writer) throws IOException {
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setIndexOptions(IndexOptions.DOCS);
Field f = new Field("content3", "aaa", customType);
doc.add(f);
FieldType customType2 = new FieldType();
customType2.setStored(true);
customType2.setIndexOptions(IndexOptions.DOCS);
f = new Field("content4", "aaa", customType2);
doc.add(f);
writer.addDocument(doc);
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class DocMaker method setConfig.
/** Set the configuration parameters of this doc maker. */
public void setConfig(Config config, ContentSource source) {
this.config = config;
this.source = source;
boolean stored = config.get("doc.stored", false);
boolean bodyStored = config.get("doc.body.stored", stored);
boolean tokenized = config.get("doc.tokenized", true);
boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
boolean norms = config.get("doc.tokenized.norms", false);
boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
boolean bodyOffsets = config.get("doc.body.offsets", false);
boolean termVec = config.get("doc.term.vector", false);
boolean termVecPositions = config.get("doc.term.vector.positions", false);
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
valType = new FieldType(TextField.TYPE_NOT_STORED);
valType.setStored(stored);
valType.setTokenized(tokenized);
valType.setOmitNorms(!norms);
valType.setStoreTermVectors(termVec);
valType.setStoreTermVectorPositions(termVecPositions);
valType.setStoreTermVectorOffsets(termVecOffsets);
valType.freeze();
bodyValType = new FieldType(TextField.TYPE_NOT_STORED);
bodyValType.setStored(bodyStored);
bodyValType.setTokenized(bodyTokenized);
bodyValType.setOmitNorms(!bodyNorms);
if (bodyTokenized && bodyOffsets) {
bodyValType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
bodyValType.setStoreTermVectors(termVec);
bodyValType.setStoreTermVectorPositions(termVecPositions);
bodyValType.setStoreTermVectorOffsets(termVecOffsets);
bodyValType.freeze();
storeBytes = config.get("doc.store.body.bytes", false);
reuseFields = config.get("doc.reuse.fields", true);
// In a multi-rounds run, it is important to reset DocState since settings
// of fields may change between rounds, and this is the only way to reset
// the cache of all threads.
docState = new ThreadLocal<>();
indexProperties = config.get("doc.index.props", false);
updateDocIDLimit = config.get("doc.random.id.limit", -1);
if (updateDocIDLimit != -1) {
r = new Random(179);
}
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class DocMaker method createDocument.
// create a doc
// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
final DocState ds = getDocState();
final Document doc = reuseFields ? ds.doc : new Document();
doc.clear();
// Set ID_FIELD
FieldType ft = new FieldType(valType);
ft.setStored(true);
Field idField = ds.getField(ID_FIELD, ft);
int id;
if (r != null) {
id = r.nextInt(updateDocIDLimit);
} else {
id = docData.getID();
if (id == -1) {
id = numDocsCreated.getAndIncrement();
}
}
idField.setStringValue(Integer.toString(id));
doc.add(idField);
// Set NAME_FIELD
String name = docData.getName();
if (name == null)
name = "";
name = cnt < 0 ? name : name + "_" + cnt;
Field nameField = ds.getField(NAME_FIELD, valType);
nameField.setStringValue(name);
doc.add(nameField);
// Set DATE_FIELD
DateUtil util = dateParsers.get();
if (util == null) {
util = new DateUtil();
dateParsers.set(util);
}
Date date = null;
String dateString = docData.getDate();
if (dateString != null) {
util.pos.setIndex(0);
date = util.parser.parse(dateString, util.pos);
//System.out.println(dateString + " parsed to " + date);
} else {
dateString = "";
}
Field dateStringField = ds.getField(DATE_FIELD, valType);
dateStringField.setStringValue(dateString);
doc.add(dateStringField);
if (date == null) {
// just set to right now
date = new Date();
}
Field dateField = ds.getNumericField(DATE_MSEC_FIELD, Long.class);
dateField.setLongValue(date.getTime());
doc.add(dateField);
util.cal.setTime(date);
final int sec = util.cal.get(Calendar.HOUR_OF_DAY) * 3600 + util.cal.get(Calendar.MINUTE) * 60 + util.cal.get(Calendar.SECOND);
Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, Integer.class);
timeSecField.setIntValue(sec);
doc.add(timeSecField);
// Set TITLE_FIELD
String title = docData.getTitle();
Field titleField = ds.getField(TITLE_FIELD, valType);
titleField.setStringValue(title == null ? "" : title);
doc.add(titleField);
String body = docData.getBody();
if (body != null && body.length() > 0) {
String bdy;
if (size <= 0 || size >= body.length()) {
// use all
bdy = body;
// nothing left
docData.setBody("");
} else {
// attempt not to break words - if whitespace found within next 20 chars...
for (int n = size - 1; n < size + 20 && n < body.length(); n++) {
if (Character.isWhitespace(body.charAt(n))) {
size = n;
break;
}
}
// use part
bdy = body.substring(0, size);
// some left
docData.setBody(body.substring(size));
}
Field bodyField = ds.getField(BODY_FIELD, bodyValType);
bodyField.setStringValue(bdy);
doc.add(bodyField);
if (storeBytes) {
Field bytesField = ds.getField(BYTES_FIELD, StringField.TYPE_STORED);
bytesField.setBytesValue(bdy.getBytes(StandardCharsets.UTF_8));
doc.add(bytesField);
}
}
if (indexProperties) {
Properties props = docData.getProps();
if (props != null) {
for (final Map.Entry<Object, Object> entry : props.entrySet()) {
Field f = ds.getField((String) entry.getKey(), valType);
f.setStringValue((String) entry.getValue());
doc.add(f);
}
docData.setProps(null);
}
}
//System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
return doc;
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestBackwardsCompatibility method addDoc.
private void addDoc(IndexWriter writer, int id) throws IOException {
Document doc = new Document();
doc.add(new TextField("content", "aaa", Field.Store.NO));
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
customType2.setStoreTermVectorOffsets(true);
doc.add(new Field("autf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("utf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fieā±·ld", "field with non-ascii name", customType2));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
BytesRef ref = new BytesRef(bytes);
doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
doc.add(new DoubleDocValuesField("dvDouble", (double) id));
doc.add(new FloatDocValuesField("dvFloat", (float) id));
doc.add(new NumericDocValuesField("dvInt", id));
doc.add(new NumericDocValuesField("dvLong", id));
doc.add(new NumericDocValuesField("dvPacked", id));
doc.add(new NumericDocValuesField("dvShort", (short) id));
doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
doc.add(new IntPoint("intPoint1d", id));
doc.add(new IntPoint("intPoint2d", id, 2 * id));
doc.add(new FloatPoint("floatPoint1d", (float) id));
doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
doc.add(new LongPoint("longPoint1d", id));
doc.add(new LongPoint("longPoint2d", id, 2 * id));
doc.add(new DoublePoint("doublePoint1d", (double) id));
doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
doc.add(new BinaryPoint("binaryPoint1d", bytes));
doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setStoreTermVectors(true);
customType3.setStoreTermVectorPositions(true);
customType3.setStoreTermVectorOffsets(true);
customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setStoreTermVectors(true);
customType4.setStoreTermVectorPositions(false);
customType4.setStoreTermVectorOffsets(true);
customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
writer.addDocument(doc);
}
Aggregations