use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class WriteLineDocTask method doLogic.
@Override
public int doLogic() throws Exception {
Document doc = docSize > 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument();
Matcher matcher = threadNormalizer.get();
if (matcher == null) {
matcher = Pattern.compile("[\t\r\n]+").matcher("");
threadNormalizer.set(matcher);
}
StringBuilder sb = threadBuffer.get();
if (sb == null) {
sb = new StringBuilder();
threadBuffer.set(sb);
}
sb.setLength(0);
boolean sufficient = !checkSufficientFields;
for (int i = 0; i < fieldsToWrite.length; i++) {
IndexableField f = doc.getField(fieldsToWrite[i]);
String text = f == null ? "" : matcher.reset(f.stringValue()).replaceAll(" ").trim();
sb.append(text).append(SEP);
sufficient |= text.length() > 0 && sufficientFields[i];
}
if (sufficient) {
// remove redundant last separator
sb.setLength(sb.length() - 1);
// lineFileOut is a PrintWriter, which synchronizes internally in println.
lineFileOut(doc).println(sb.toString());
}
return 1;
}
use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class Document method toString.
/** Prints the fields of a document for human consumption. */
@Override
public final String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("Document<");
for (int i = 0; i < fields.size(); i++) {
IndexableField field = fields.get(i);
buffer.append(field.toString());
if (i != fields.size() - 1) {
buffer.append(" ");
}
}
buffer.append(">");
return buffer.toString();
}
use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class PolyFieldTest method testPointFieldType.
@Test
public void testPointFieldType() throws Exception {
SolrCore core = h.getCore();
IndexSchema schema = core.getLatestSchema();
SchemaField home = schema.getField("home");
assertNotNull(home);
assertTrue("home is not a poly field", home.isPolyField());
FieldType tmp = home.getType();
assertTrue(tmp instanceof PointType);
PointType pt = (PointType) tmp;
assertEquals(pt.getDimension(), 2);
double[] xy = new double[] { 35.0, -79.34 };
String point = xy[0] + "," + xy[1];
List<IndexableField> fields = home.createFields(point);
assertNotNull(pt.getSubType());
//If DV=false, we expect one field per dimension plus a stored field
int expectdNumFields = 3;
if (pt.subField(home, 0, schema).hasDocValues()) {
// If docValues=true, then we expect two more fields
expectdNumFields += 2;
}
assertEquals("Unexpected fields created: " + Arrays.toString(fields.toArray()), expectdNumFields, fields.size());
//first two/four fields contain the values, last one is just stored and contains the original
for (int i = 0; i < expectdNumFields; i++) {
boolean hasValue = fields.get(i).binaryValue() != null || fields.get(i).stringValue() != null || fields.get(i).numericValue() != null;
assertTrue("Doesn't have a value: " + fields.get(i), hasValue);
}
/*assertTrue("first field " + fields[0].tokenStreamValue() + " is not 35.0", pt.getSubType().toExternal(fields[0]).equals(String.valueOf(xy[0])));
assertTrue("second field is not -79.34", pt.getSubType().toExternal(fields[1]).equals(String.valueOf(xy[1])));
assertTrue("third field is not '35.0,-79.34'", pt.getSubType().toExternal(fields[2]).equals(point));*/
home = schema.getField("home_ns");
assertNotNull(home);
fields = home.createFields(point);
//one less field than with "home", since we aren't storing
assertEquals(expectdNumFields - 1, fields.size(), 2);
home = schema.getField("home_ns");
assertNotNull(home);
try {
fields = home.createFields("35.0,foo");
assertTrue(false);
} catch (Exception e) {
//
}
SchemaField s1 = schema.getField("test_p");
SchemaField s2 = schema.getField("test_p");
ValueSource v1 = s1.getType().getValueSource(s1, null);
ValueSource v2 = s2.getType().getValueSource(s2, null);
assertEquals(v1, v2);
assertEquals(v1.hashCode(), v2.hashCode());
}
use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class SimpleNaiveBayesDocumentClassifier method analyzeSeedDocument.
/**
* This methods performs the analysis for the seed document and extract the boosts if present.
* This is done only one time for the Seed Document.
*
* @param inputDocument the seed unseen document
* @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values
* @param fieldName2boost a map that associates the boost to the field
* @throws IOException If there is a low-level I/O error
*/
private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException {
for (int i = 0; i < textFieldNames.length; i++) {
String fieldName = textFieldNames[i];
float boost = 1;
List<String[]> tokenizedValues = new LinkedList<>();
if (fieldName.contains("^")) {
String[] field2boost = fieldName.split("\\^");
fieldName = field2boost[0];
boost = Float.parseFloat(field2boost[1]);
}
IndexableField[] fieldValues = inputDocument.getFields(fieldName);
for (IndexableField fieldValue : fieldValues) {
TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null);
String[] fieldTokensArray = getTokenArray(fieldTokens);
tokenizedValues.add(fieldTokensArray);
}
fieldName2tokensArray.put(fieldName, tokenizedValues);
fieldName2boost.put(fieldName, boost);
textFieldNames[i] = fieldName;
}
}
use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.
the class DatasetSplitter method createNewDoc.
private Document createNewDoc(IndexReader originalIndex, FieldType ft, ScoreDoc scoreDoc, String[] fieldNames) throws IOException {
Document doc = new Document();
Document document = originalIndex.document(scoreDoc.doc);
if (fieldNames != null && fieldNames.length > 0) {
for (String fieldName : fieldNames) {
IndexableField field = document.getField(fieldName);
if (field != null) {
doc.add(new Field(fieldName, field.stringValue(), ft));
}
}
} else {
for (IndexableField field : document.getFields()) {
if (field.readerValue() != null) {
doc.add(new Field(field.name(), field.readerValue(), ft));
} else if (field.binaryValue() != null) {
doc.add(new Field(field.name(), field.binaryValue(), ft));
} else if (field.stringValue() != null) {
doc.add(new Field(field.name(), field.stringValue(), ft));
} else if (field.numericValue() != null) {
doc.add(new Field(field.name(), field.numericValue().toString(), ft));
}
}
}
return doc;
}
Aggregations