Search in sources :

Example 16 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class AnalysisRequestHandlerBase method convertTokensToNamedLists.

/**
   * Converts the list of Tokens to a list of NamedLists representing the tokens.
   *
   * @param tokenList  Tokens to convert
   * @param context The analysis context
   *
   * @return List of NamedLists containing the relevant information taken from the tokens
   */
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
    // sort the tokens by absolute position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {

        @Override
        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions());
        }

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
                p++;
            }
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
        }
    });
    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
        tokenNamedList.add("text", text);
        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
            }
        }
        tokenNamedList.add("raw_bytes", rawBytes.toString());
        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        }
        token.reflectWith(new AttributeReflector() {

            @Override
            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                    return;
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                    return;
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                    return;
                String k = attClass.getName() + '#' + key;
                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                }
                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }
                tokenNamedList.add(k, value);
            }
        });
        tokensNamedLists.add(tokenNamedList);
    }
    return tokensNamedLists;
}
Also used : AttributeSource(org.apache.lucene.util.AttributeSource) NamedList(org.apache.solr.common.util.NamedList) AttributeReflector(org.apache.lucene.util.AttributeReflector) ArrayList(java.util.ArrayList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class DocumentAnalysisRequestHandler method handleAnalysisRequest.

/**
   * Handles the resolved {@link DocumentAnalysisRequest} and returns the analysis response as a named list.
   *
   * @param request The {@link DocumentAnalysisRequest} to be handled.
   * @param schema  The index schema.
   *
   * @return The analysis response as a named list.
   */
NamedList<Object> handleAnalysisRequest(DocumentAnalysisRequest request, IndexSchema schema) {
    SchemaField uniqueKeyField = schema.getUniqueKeyField();
    NamedList<Object> result = new SimpleOrderedMap<>();
    for (SolrInputDocument document : request.getDocuments()) {
        NamedList<NamedList> theTokens = new SimpleOrderedMap<>();
        result.add(document.getFieldValue(uniqueKeyField.getName()).toString(), theTokens);
        for (String name : document.getFieldNames()) {
            // there's no point of providing analysis to unindexed fields.
            SchemaField field = schema.getField(name);
            if (!field.indexed()) {
                continue;
            }
            NamedList<Object> fieldTokens = new SimpleOrderedMap<>();
            theTokens.add(name, fieldTokens);
            FieldType fieldType = schema.getFieldType(name);
            final String queryValue = request.getQuery();
            Set<BytesRef> termsToMatch;
            try {
                termsToMatch = (queryValue != null && request.isShowMatch()) ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) : EMPTY_BYTES_SET;
            } catch (Exception e) {
                // ignore analysis exceptions since we are applying arbitrary text to all fields
                termsToMatch = EMPTY_BYTES_SET;
            }
            if (request.getQuery() != null) {
                try {
                    AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
                    fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
                } catch (Exception e) {
                // ignore analysis exceptions since we are applying arbitrary text to all fields
                }
            }
            Analyzer analyzer = fieldType.getIndexAnalyzer();
            AnalysisContext analysisContext = new AnalysisContext(fieldType, analyzer, termsToMatch);
            Collection<Object> fieldValues = document.getFieldValues(name);
            NamedList<NamedList<? extends Object>> indexTokens = new SimpleOrderedMap<>();
            for (Object fieldValue : fieldValues) {
                indexTokens.add(String.valueOf(fieldValue), analyzeValue(fieldValue.toString(), analysisContext));
            }
            fieldTokens.add("index", indexTokens);
        }
    }
    return result;
}
Also used : NamedList(org.apache.solr.common.util.NamedList) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) SolrInputDocument(org.apache.solr.common.SolrInputDocument) BytesRef(org.apache.lucene.util.BytesRef)

Example 18 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class BlobHandler method handleRequestBody.

@Override
public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    String httpMethod = req.getHttpMethod();
    String path = (String) req.getContext().get("path");
    SolrConfigHandler.setWt(req, JSON);
    List<String> pieces = StrUtils.splitSmart(path, '/');
    String blobName = null;
    if (pieces.size() >= 3)
        blobName = pieces.get(2);
    if ("POST".equals(httpMethod)) {
        if (blobName == null || blobName.isEmpty()) {
            rsp.add("error", "Name not found");
            return;
        }
        String err = SolrConfigHandler.validateName(blobName);
        if (err != null) {
            log.warn("no blob name");
            rsp.add("error", err);
            return;
        }
        if (req.getContentStreams() == null) {
            log.warn("no content stream");
            rsp.add("error", "No stream");
            return;
        }
        for (ContentStream stream : req.getContentStreams()) {
            ByteBuffer payload = SimplePostTool.inputStreamToByteArray(stream.getStream(), maxSize);
            MessageDigest m = MessageDigest.getInstance("MD5");
            m.update(payload.array(), payload.position(), payload.limit());
            String md5 = new BigInteger(1, m.digest()).toString(16);
            TopDocs duplicate = req.getSearcher().search(new TermQuery(new Term("md5", md5)), 1);
            if (duplicate.totalHits > 0) {
                rsp.add("error", "duplicate entry");
                forward(req, null, new MapSolrParams((Map) makeMap("q", "md5:" + md5, "fl", "id,size,version,timestamp,blobName")), rsp);
                log.warn("duplicate entry for blob :" + blobName);
                return;
            }
            TopFieldDocs docs = req.getSearcher().search(new TermQuery(new Term("blobName", blobName)), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
            long version = 0;
            if (docs.totalHits > 0) {
                Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                Number n = doc.getField("version").numericValue();
                version = n.longValue();
            }
            version++;
            String id = blobName + "/" + version;
            Map<String, Object> doc = makeMap(ID, id, "md5", md5, "blobName", blobName, VERSION, version, "timestamp", new Date(), "size", payload.limit(), "blob", payload);
            verifyWithRealtimeGet(blobName, version, req, doc);
            log.info(StrUtils.formatString("inserting new blob {0} ,size {1}, md5 {2}", doc.get(ID), String.valueOf(payload.limit()), md5));
            indexMap(req, rsp, doc);
            log.info(" Successfully Added and committed a blob with id {} and size {} ", id, payload.limit());
            break;
        }
    } else {
        int version = -1;
        if (pieces.size() > 3) {
            try {
                version = Integer.parseInt(pieces.get(3));
            } catch (NumberFormatException e) {
                rsp.add("error", "Invalid version" + pieces.get(3));
                return;
            }
        }
        if (ReplicationHandler.FILE_STREAM.equals(req.getParams().get(CommonParams.WT))) {
            if (blobName == null) {
                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Please send the request in the format /blob/<blobName>/<version>");
            } else {
                String q = "blobName:{0}";
                if (version != -1)
                    q = "id:{0}/{1}";
                QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
                final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
                if (docs.totalHits > 0) {
                    rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {

                        @Override
                        public void write(OutputStream os) throws IOException {
                            Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                            IndexableField sf = doc.getField("blob");
                            FieldType fieldType = req.getSchema().getField("blob").getType();
                            ByteBuffer buf = (ByteBuffer) fieldType.toObject(sf);
                            if (buf == null) {
                                //should never happen unless a user wrote this document directly
                                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Invalid document . No field called blob");
                            } else {
                                os.write(buf.array(), 0, buf.limit());
                            }
                        }
                    });
                } else {
                    throw new SolrException(SolrException.ErrorCode.NOT_FOUND, StrUtils.formatString("Invalid combination of blobName {0} and version {1}", blobName, version));
                }
            }
        } else {
            String q = "*:*";
            if (blobName != null) {
                q = "blobName:{0}";
                if (version != -1) {
                    q = "id:{0}/{1}";
                }
            }
            forward(req, null, new MapSolrParams((Map) makeMap("q", StrUtils.formatString(q, blobName, version), "fl", "id,size,version,timestamp,blobName,md5", SORT, "version desc")), rsp);
        }
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) OutputStream(java.io.OutputStream) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) TopDocs(org.apache.lucene.search.TopDocs) ContentStream(org.apache.solr.common.util.ContentStream) Sort(org.apache.lucene.search.Sort) MessageDigest(java.security.MessageDigest) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Date(java.util.Date) FieldType(org.apache.solr.schema.FieldType) IndexableField(org.apache.lucene.index.IndexableField) MapSolrParams(org.apache.solr.common.params.MapSolrParams) QParser(org.apache.solr.search.QParser) BigInteger(java.math.BigInteger) Map(java.util.Map) Utils.makeMap(org.apache.solr.common.util.Utils.makeMap) Collections.singletonMap(java.util.Collections.singletonMap)

Example 19 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class TermsComponent method fetchTerms.

private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList, boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
    String[] splitTerms = termList.split(",");
    for (int i = 0; i < splitTerms.length; i++) {
        splitTerms[i] = splitTerms[i].trim();
    }
    // Sort the terms once
    Arrays.sort(splitTerms);
    IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
    for (String field : fields) {
        FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
        // Since splitTerms is already sorted, this array will also be sorted
        Term[] terms = new Term[splitTerms.length];
        for (int i = 0; i < splitTerms.length; i++) {
            terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
        }
        TermContext[] termContexts = new TermContext[terms.length];
        collectTermContext(topReaderContext, termContexts, terms);
        NamedList<Object> termsMap = new SimpleOrderedMap<>();
        for (int i = 0; i < terms.length; i++) {
            if (termContexts[i] != null) {
                String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
                int docFreq = termContexts[i].docFreq();
                if (!includeTotalTermFreq) {
                    termsMap.add(outTerm, docFreq);
                } else {
                    long totalTermFreq = termContexts[i].totalTermFreq();
                    NamedList<Long> termStats = new SimpleOrderedMap<>();
                    termStats.add("df", (long) docFreq);
                    termStats.add("ttf", totalTermFreq);
                    termsMap.add(outTerm, termStats);
                }
            }
        }
        result.add(field, termsMap);
    }
}
Also used : SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType)

Example 20 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method getListedTermCounts.

/**
   * Computes the term-&gt;count counts for the specified term values relative to the 
   * @param field the name of the field to compute term counts against
   * @param parsed contains the docset to compute term counts relative to
   * @param terms a list of term values (in the specified field) to compute the counts for 
   */
protected NamedList<Integer> getListedTermCounts(String field, final ParsedParams parsed, List<String> terms) throws IOException {
    SchemaField sf = searcher.getSchema().getField(field);
    FieldType ft = sf.getType();
    NamedList<Integer> res = new NamedList<>();
    for (String term : terms) {
        int count = searcher.numDocs(ft.getFieldQuery(null, sf, term), parsed.docs);
        res.add(term, count);
    }
    return res;
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) NamedList(org.apache.solr.common.util.NamedList) FieldType(org.apache.solr.schema.FieldType)

Aggregations

FieldType (org.apache.solr.schema.FieldType)93 SchemaField (org.apache.solr.schema.SchemaField)37 SolrException (org.apache.solr.common.SolrException)29 ArrayList (java.util.ArrayList)23 BytesRef (org.apache.lucene.util.BytesRef)23 NamedList (org.apache.solr.common.util.NamedList)23 IOException (java.io.IOException)18 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)15 IndexSchema (org.apache.solr.schema.IndexSchema)14 Query (org.apache.lucene.search.Query)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 Analyzer (org.apache.lucene.analysis.Analyzer)12 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 StrField (org.apache.solr.schema.StrField)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 DocIterator (org.apache.solr.search.DocIterator)7 DocList (org.apache.solr.search.DocList)7