Search in sources :

Example 86 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class WriteLineDocTask method doLogic.

@Override
public int doLogic() throws Exception {
    Document doc = docSize > 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument();
    Matcher matcher = threadNormalizer.get();
    if (matcher == null) {
        matcher = Pattern.compile("[\t\r\n]+").matcher("");
        threadNormalizer.set(matcher);
    }
    StringBuilder sb = threadBuffer.get();
    if (sb == null) {
        sb = new StringBuilder();
        threadBuffer.set(sb);
    }
    sb.setLength(0);
    boolean sufficient = !checkSufficientFields;
    for (int i = 0; i < fieldsToWrite.length; i++) {
        IndexableField f = doc.getField(fieldsToWrite[i]);
        String text = f == null ? "" : matcher.reset(f.stringValue()).replaceAll(" ").trim();
        sb.append(text).append(SEP);
        sufficient |= text.length() > 0 && sufficientFields[i];
    }
    if (sufficient) {
        // remove redundant last separator
        sb.setLength(sb.length() - 1);
        // lineFileOut is a PrintWriter, which synchronizes internally in println.
        lineFileOut(doc).println(sb.toString());
    }
    return 1;
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) Matcher(java.util.regex.Matcher) Document(org.apache.lucene.document.Document)

Example 87 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class Document method toString.

/** Prints the fields of a document for human consumption. */
@Override
public final String toString() {
    StringBuilder buffer = new StringBuilder();
    buffer.append("Document<");
    for (int i = 0; i < fields.size(); i++) {
        IndexableField field = fields.get(i);
        buffer.append(field.toString());
        if (i != fields.size() - 1) {
            buffer.append(" ");
        }
    }
    buffer.append(">");
    return buffer.toString();
}
Also used : IndexableField(org.apache.lucene.index.IndexableField)

Example 88 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class PolyFieldTest method testPointFieldType.

@Test
public void testPointFieldType() throws Exception {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getLatestSchema();
    SchemaField home = schema.getField("home");
    assertNotNull(home);
    assertTrue("home is not a poly field", home.isPolyField());
    FieldType tmp = home.getType();
    assertTrue(tmp instanceof PointType);
    PointType pt = (PointType) tmp;
    assertEquals(pt.getDimension(), 2);
    double[] xy = new double[] { 35.0, -79.34 };
    String point = xy[0] + "," + xy[1];
    List<IndexableField> fields = home.createFields(point);
    assertNotNull(pt.getSubType());
    //If DV=false, we expect one field per dimension plus a stored field
    int expectdNumFields = 3;
    if (pt.subField(home, 0, schema).hasDocValues()) {
        // If docValues=true, then we expect two more fields
        expectdNumFields += 2;
    }
    assertEquals("Unexpected fields created: " + Arrays.toString(fields.toArray()), expectdNumFields, fields.size());
    //first two/four fields contain the values, last one is just stored and contains the original
    for (int i = 0; i < expectdNumFields; i++) {
        boolean hasValue = fields.get(i).binaryValue() != null || fields.get(i).stringValue() != null || fields.get(i).numericValue() != null;
        assertTrue("Doesn't have a value: " + fields.get(i), hasValue);
    }
    /*assertTrue("first field " + fields[0].tokenStreamValue() +  " is not 35.0", pt.getSubType().toExternal(fields[0]).equals(String.valueOf(xy[0])));
    assertTrue("second field is not -79.34", pt.getSubType().toExternal(fields[1]).equals(String.valueOf(xy[1])));
    assertTrue("third field is not '35.0,-79.34'", pt.getSubType().toExternal(fields[2]).equals(point));*/
    home = schema.getField("home_ns");
    assertNotNull(home);
    fields = home.createFields(point);
    //one less field than with "home", since we aren't storing
    assertEquals(expectdNumFields - 1, fields.size(), 2);
    home = schema.getField("home_ns");
    assertNotNull(home);
    try {
        fields = home.createFields("35.0,foo");
        assertTrue(false);
    } catch (Exception e) {
    //
    }
    SchemaField s1 = schema.getField("test_p");
    SchemaField s2 = schema.getField("test_p");
    ValueSource v1 = s1.getType().getValueSource(s1, null);
    ValueSource v2 = s2.getType().getValueSource(s2, null);
    assertEquals(v1, v2);
    assertEquals(v1.hashCode(), v2.hashCode());
}
Also used : SolrCore(org.apache.solr.core.SolrCore) SolrException(org.apache.solr.common.SolrException) IndexableField(org.apache.lucene.index.IndexableField) ValueSource(org.apache.lucene.queries.function.ValueSource) Test(org.junit.Test)

Example 89 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class SimpleNaiveBayesDocumentClassifier method analyzeSeedDocument.

/**
   * This methods performs the analysis for the seed document and extract the boosts if present.
   * This is done only one time for the Seed Document.
   *
   * @param inputDocument         the seed unseen document
   * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values
   * @param fieldName2boost       a map that associates the boost to the field
   * @throws IOException If there is a low-level I/O error
   */
private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException {
    for (int i = 0; i < textFieldNames.length; i++) {
        String fieldName = textFieldNames[i];
        float boost = 1;
        List<String[]> tokenizedValues = new LinkedList<>();
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = Float.parseFloat(field2boost[1]);
        }
        IndexableField[] fieldValues = inputDocument.getFields(fieldName);
        for (IndexableField fieldValue : fieldValues) {
            TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null);
            String[] fieldTokensArray = getTokenArray(fieldTokens);
            tokenizedValues.add(fieldTokensArray);
        }
        fieldName2tokensArray.put(fieldName, tokenizedValues);
        fieldName2boost.put(fieldName, boost);
        textFieldNames[i] = fieldName;
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) TokenStream(org.apache.lucene.analysis.TokenStream) LinkedList(java.util.LinkedList)

Example 90 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DatasetSplitter method createNewDoc.

private Document createNewDoc(IndexReader originalIndex, FieldType ft, ScoreDoc scoreDoc, String[] fieldNames) throws IOException {
    Document doc = new Document();
    Document document = originalIndex.document(scoreDoc.doc);
    if (fieldNames != null && fieldNames.length > 0) {
        for (String fieldName : fieldNames) {
            IndexableField field = document.getField(fieldName);
            if (field != null) {
                doc.add(new Field(fieldName, field.stringValue(), ft));
            }
        }
    } else {
        for (IndexableField field : document.getFields()) {
            if (field.readerValue() != null) {
                doc.add(new Field(field.name(), field.readerValue(), ft));
            } else if (field.binaryValue() != null) {
                doc.add(new Field(field.name(), field.binaryValue(), ft));
            } else if (field.stringValue() != null) {
                doc.add(new Field(field.name(), field.stringValue(), ft));
            } else if (field.numericValue() != null) {
                doc.add(new Field(field.name(), field.numericValue().toString(), ft));
            }
        }
    }
    return doc;
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) IndexableField(org.apache.lucene.index.IndexableField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document)

Aggregations

IndexableField (org.apache.lucene.index.IndexableField)276 Document (org.apache.lucene.document.Document)90 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)75 Matchers.containsString (org.hamcrest.Matchers.containsString)57 BytesRef (org.apache.lucene.util.BytesRef)53 ArrayList (java.util.ArrayList)50 Field (org.apache.lucene.document.Field)34 Test (org.junit.Test)28 IOException (java.io.IOException)27 HashMap (java.util.HashMap)24 IndexReader (org.apache.lucene.index.IndexReader)24 Directory (org.apache.lucene.store.Directory)23 Map (java.util.Map)22 TopDocs (org.apache.lucene.search.TopDocs)22 Term (org.apache.lucene.index.Term)21 HashSet (java.util.HashSet)20 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)20 DocumentMapper (org.elasticsearch.index.mapper.DocumentMapper)20 Query (org.apache.lucene.search.Query)19 Analyzer (org.apache.lucene.analysis.Analyzer)18