Examples with IndexSchema - org.apache.solr.schema.IndexSchema

Example 76 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class QueryComponent method mergeIds.

protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
    List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
    if (mergeStrategies != null) {
        Collections.sort(mergeStrategies, MergeStrategy.MERGE_COMP);
        boolean idsMerged = false;
        for (MergeStrategy mergeStrategy : mergeStrategies) {
            mergeStrategy.merge(rb, sreq);
            if (mergeStrategy.mergesIds()) {
                idsMerged = true;
            }
        }
        if (idsMerged) {
            //ids were merged above so return.
            return;
        }
    }
    SortSpec ss = rb.getSortSpec();
    Sort sort = ss.getSort();
    SortField[] sortFields = null;
    if (sort != null)
        sortFields = sort.getSort();
    else {
        sortFields = new SortField[] { SortField.FIELD_SCORE };
    }
    IndexSchema schema = rb.req.getSchema();
    SchemaField uniqueKeyField = schema.getUniqueKeyField();
    // id to shard mapping, to eliminate any accidental dups
    HashMap<Object, String> uniqueDoc = new HashMap<>();
    // Merge the docs via a priority queue so we don't have to sort *all* of the
    // documents... we only need to order the top (rows+start)
    final ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
    NamedList<Object> shardInfo = null;
    if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
        shardInfo = new SimpleOrderedMap<>();
        rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
    }
    long numFound = 0;
    Float maxScore = null;
    boolean partialResults = false;
    Boolean segmentTerminatedEarly = null;
    for (ShardResponse srsp : sreq.responses) {
        SolrDocumentList docs = null;
        NamedList<?> responseHeader = null;
        if (shardInfo != null) {
            SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
            if (srsp.getException() != null) {
                Throwable t = srsp.getException();
                if (t instanceof SolrServerException) {
                    t = ((SolrServerException) t).getCause();
                }
                nl.add("error", t.toString());
                StringWriter trace = new StringWriter();
                t.printStackTrace(new PrintWriter(trace));
                nl.add("trace", trace.toString());
                if (srsp.getShardAddress() != null) {
                    nl.add("shardAddress", srsp.getShardAddress());
                }
            } else {
                responseHeader = (NamedList<?>) srsp.getSolrResponse().getResponse().get("responseHeader");
                final Object rhste = (responseHeader == null ? null : responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY));
                if (rhste != null) {
                    nl.add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, rhste);
                }
                docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
                nl.add("numFound", docs.getNumFound());
                nl.add("maxScore", docs.getMaxScore());
                nl.add("shardAddress", srsp.getShardAddress());
            }
            if (srsp.getSolrResponse() != null) {
                nl.add("time", srsp.getSolrResponse().getElapsedTime());
            }
            shardInfo.add(srsp.getShard(), nl);
        }
        // now that we've added the shard info, let's only proceed if we have no error.
        if (srsp.getException() != null) {
            partialResults = true;
            continue;
        }
        if (docs == null) {
            // could have been initialized in the shards info block above
            docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
        }
        if (responseHeader == null) {
            // could have been initialized in the shards info block above
            responseHeader = (NamedList<?>) srsp.getSolrResponse().getResponse().get("responseHeader");
        }
        if (responseHeader != null) {
            if (Boolean.TRUE.equals(responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY))) {
                partialResults = true;
            }
            if (!Boolean.TRUE.equals(segmentTerminatedEarly)) {
                final Object ste = responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY);
                if (Boolean.TRUE.equals(ste)) {
                    segmentTerminatedEarly = Boolean.TRUE;
                } else if (Boolean.FALSE.equals(ste)) {
                    segmentTerminatedEarly = Boolean.FALSE;
                }
            }
        }
        // calculate global maxScore and numDocsFound
        if (docs.getMaxScore() != null) {
            maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
        }
        numFound += docs.getNumFound();
        NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values"));
        NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema);
        // put it in the priority queue so it can be ordered.
        for (int i = 0; i < docs.size(); i++) {
            SolrDocument doc = docs.get(i);
            Object id = doc.getFieldValue(uniqueKeyField.getName());
            String prevShard = uniqueDoc.put(id, srsp.getShard());
            if (prevShard != null) {
                // duplicate detected
                numFound--;
                // to the Java5 PriorityQueue, this would be easier.
                continue;
            // make which duplicate is used deterministic based on shard
            // if (prevShard.compareTo(srsp.shard) >= 0) {
            //  TODO: remove previous from priority queue
            //  continue;
            // }
            }
            ShardDoc shardDoc = new ShardDoc();
            shardDoc.id = id;
            shardDoc.shard = srsp.getShard();
            shardDoc.orderInShard = i;
            Object scoreObj = doc.getFieldValue("score");
            if (scoreObj != null) {
                if (scoreObj instanceof String) {
                    shardDoc.score = Float.parseFloat((String) scoreObj);
                } else {
                    shardDoc.score = (Float) scoreObj;
                }
            }
            shardDoc.sortFieldValues = unmarshalledSortFieldValues;
            queue.insertWithOverflow(shardDoc);
        }
    // end for-each-doc-in-response
    }
    // end for-each-response
    // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
    // So we want to pop the last documents off the queue to get
    // the docs offset -> queuesize
    int resultSize = queue.size() - ss.getOffset();
    // there may not be any docs in range
    resultSize = Math.max(0, resultSize);
    Map<Object, ShardDoc> resultIds = new HashMap<>();
    for (int i = resultSize - 1; i >= 0; i--) {
        ShardDoc shardDoc = queue.pop();
        shardDoc.positionInResponse = i;
        // Need the toString() for correlation with other lists that must
        // be strings (like keys in highlighting, explain, etc)
        resultIds.put(shardDoc.id.toString(), shardDoc);
    }
    // Add hits for distributed requests
    // https://issues.apache.org/jira/browse/SOLR-3518
    rb.rsp.addToLog("hits", numFound);
    SolrDocumentList responseDocs = new SolrDocumentList();
    if (maxScore != null)
        responseDocs.setMaxScore(maxScore);
    responseDocs.setNumFound(numFound);
    responseDocs.setStart(ss.getOffset());
    // size appropriately
    for (int i = 0; i < resultSize; i++) responseDocs.add(null);
    // save these results in a private area so we can access them
    // again when retrieving stored fields.
    // TODO: use ResponseBuilder (w/ comments) or the request context?
    rb.resultIds = resultIds;
    rb.setResponseDocs(responseDocs);
    populateNextCursorMarkFromMergedShards(rb);
    if (partialResults) {
        if (rb.rsp.getResponseHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY) == null) {
            rb.rsp.getResponseHeader().add(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY, Boolean.TRUE);
        }
    }
    if (segmentTerminatedEarly != null) {
        final Object existingSegmentTerminatedEarly = rb.rsp.getResponseHeader().get(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY);
        if (existingSegmentTerminatedEarly == null) {
            rb.rsp.getResponseHeader().add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, segmentTerminatedEarly);
        } else if (!Boolean.TRUE.equals(existingSegmentTerminatedEarly) && Boolean.TRUE.equals(segmentTerminatedEarly)) {
            rb.rsp.getResponseHeader().remove(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY);
            rb.rsp.getResponseHeader().add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, segmentTerminatedEarly);
        }
    }
}

Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SortField(org.apache.lucene.search.SortField) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrDocument(org.apache.solr.common.SolrDocument) StringWriter(java.io.StringWriter) Sort(org.apache.lucene.search.Sort) PrintWriter(java.io.PrintWriter) NamedList(org.apache.solr.common.util.NamedList) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SchemaField(org.apache.solr.schema.SchemaField) IndexSchema(org.apache.solr.schema.IndexSchema) SortSpec(org.apache.solr.search.SortSpec)

Example 77 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class FileBasedSpellChecker method loadExternalFileDictionary.

private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
    try {
        IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
        // Get the field's analyzer
        if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            // Do index-time analysis using the given fieldType's analyzer
            RAMDirectory ramDir = new RAMDirectory();
            LogMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(300);
            IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(fieldType.getIndexAnalyzer()).setMaxBufferedDocs(150).setMergePolicy(mp).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
            List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);
            for (String s : lines) {
                Document d = new Document();
                d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
                writer.addDocument(d);
            }
            writer.forceMerge(1);
            writer.close();
            dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
        } else {
            // check if character encoding is defined
            if (characterEncoding == null) {
                dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
            } else {
                dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
            }
        }
    } catch (IOException e) {
        log.error("Unable to load spellings", e);
    }
}

Also used : InputStreamReader(java.io.InputStreamReader) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.solr.schema.FieldType) HighFrequencyDictionary(org.apache.lucene.search.spell.HighFrequencyDictionary) LogByteSizeMergePolicy(org.apache.lucene.index.LogByteSizeMergePolicy) IndexWriter(org.apache.lucene.index.IndexWriter) PlainTextDictionary(org.apache.lucene.search.spell.PlainTextDictionary) LogMergePolicy(org.apache.lucene.index.LogMergePolicy) TextField(org.apache.lucene.document.TextField) IndexSchema(org.apache.solr.schema.IndexSchema) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 78 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class AddSchemaFieldsUpdateProcessorFactoryTest method testSingleFieldRoundTrip.

public void testSingleFieldRoundTrip() throws Exception {
    IndexSchema schema = h.getCore().getLatestSchema();
    final String fieldName = "newfield2";
    assertNull(schema.getFieldOrNull(fieldName));
    Float floatValue = -13258.992f;
    SolrInputDocument d = processAdd("add-fields", doc(f("id", "2"), f(fieldName, floatValue)));
    assertNotNull(d);
    schema = h.getCore().getLatestSchema();
    assertNotNull(schema.getFieldOrNull(fieldName));
    assertEquals("tfloat", schema.getFieldType(fieldName).getTypeName());
    assertU(commit());
    assertQ(req("id:2"), "//arr[@name='" + fieldName + "']/float[.='" + floatValue.toString() + "']");
}

Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) IndexSchema(org.apache.solr.schema.IndexSchema)

Example 79 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class AddSchemaFieldsUpdateProcessorFactoryTest method testSingleField.

public void testSingleField() throws Exception {
    IndexSchema schema = h.getCore().getLatestSchema();
    final String fieldName = "newfield1";
    assertNull(schema.getFieldOrNull(fieldName));
    String dateString = "2010-11-12T13:14:15.168Z";
    DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
    Date date = dateTimeFormatter.parseDateTime(dateString).toDate();
    SolrInputDocument d = processAdd("add-fields-no-run-processor", doc(f("id", "1"), f(fieldName, date)));
    assertNotNull(d);
    schema = h.getCore().getLatestSchema();
    assertNotNull(schema.getFieldOrNull(fieldName));
    assertEquals("tdate", schema.getFieldType(fieldName).getTypeName());
}

Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) IndexSchema(org.apache.solr.schema.IndexSchema) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Date(java.util.Date)

Example 80 with IndexSchema

use of org.apache.solr.schema.IndexSchema in project lucene-solr by apache.

the class AddSchemaFieldsUpdateProcessorFactoryTest method testMultipleFieldsRoundTrip.

public void testMultipleFieldsRoundTrip() throws Exception {
    IndexSchema schema = h.getCore().getLatestSchema();
    final String fieldName1 = "newfield5";
    final String fieldName2 = "newfield6";
    assertNull(schema.getFieldOrNull(fieldName1));
    assertNull(schema.getFieldOrNull(fieldName2));
    Float field1Value1 = -13258.0f;
    Double field1Value2 = 8.4828800808E10;
    Long field1Value3 = 999L;
    Integer field2Value1 = 55123;
    Long field2Value2 = 1234567890123456789L;
    SolrInputDocument d = processAdd("add-fields", doc(f("id", "5"), f(fieldName1, field1Value1, field1Value2, field1Value3), f(fieldName2, field2Value1, field2Value2)));
    assertNotNull(d);
    schema = h.getCore().getLatestSchema();
    assertNotNull(schema.getFieldOrNull(fieldName1));
    assertNotNull(schema.getFieldOrNull(fieldName2));
    assertEquals("tdouble", schema.getFieldType(fieldName1).getTypeName());
    assertEquals("tlong", schema.getFieldType(fieldName2).getTypeName());
    assertU(commit());
    assertQ(req("id:5"), "//arr[@name='" + fieldName1 + "']/double[.='" + field1Value1.toString() + "']", "//arr[@name='" + fieldName1 + "']/double[.='" + field1Value2.toString() + "']", "//arr[@name='" + fieldName1 + "']/double[.='" + field1Value3.doubleValue() + "']", "//arr[@name='" + fieldName2 + "']/long[.='" + field2Value1.toString() + "']", "//arr[@name='" + fieldName2 + "']/long[.='" + field2Value2.toString() + "']");
}

Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) IndexSchema(org.apache.solr.schema.IndexSchema)

Aggregations

IndexSchema (org.apache.solr.schema.IndexSchema)116 SolrInputDocument (org.apache.solr.common.SolrInputDocument)42 SchemaField (org.apache.solr.schema.SchemaField)34 HashMap (java.util.HashMap)16 SolrException (org.apache.solr.common.SolrException)15 IOException (java.io.IOException)14 FieldType (org.apache.solr.schema.FieldType)14 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)13 Date (java.util.Date)12 LinkedHashMap (java.util.LinkedHashMap)12 NamedList (org.apache.solr.common.util.NamedList)12 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)12 ArrayList (java.util.ArrayList)11 Document (org.apache.lucene.document.Document)11 SolrParams (org.apache.solr.common.params.SolrParams)11 DateTime (org.joda.time.DateTime)10 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)9 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)9 SolrConfig (org.apache.solr.core.SolrConfig)8 Test (org.junit.Test)7