Search in sources :

Example 16 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class AnalysisRequestHandlerBase method convertTokensToNamedLists.

   * Converts the list of Tokens to a list of NamedLists representing the tokens.
   * @param tokenList  Tokens to convert
   * @param context The analysis context
   * @return List of NamedLists containing the relevant information taken from the tokens
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
    // sort the tokens by absolute position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {

        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions());

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
        tokenNamedList.add("text", text);
        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
        tokenNamedList.add("raw_bytes", rawBytes.toString());
        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        token.reflectWith(new AttributeReflector() {

            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                String k = attClass.getName() + '#' + key;
                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                tokenNamedList.add(k, value);
    return tokensNamedLists;
Also used : AttributeSource(org.apache.lucene.util.AttributeSource) NamedList(org.apache.solr.common.util.NamedList) AttributeReflector(org.apache.lucene.util.AttributeReflector) ArrayList(java.util.ArrayList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 17 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class DocumentAnalysisRequestHandler method handleAnalysisRequest.

   * Handles the resolved {@link DocumentAnalysisRequest} and returns the analysis response as a named list.
   * @param request The {@link DocumentAnalysisRequest} to be handled.
   * @param schema  The index schema.
   * @return The analysis response as a named list.
NamedList<Object> handleAnalysisRequest(DocumentAnalysisRequest request, IndexSchema schema) {
    SchemaField uniqueKeyField = schema.getUniqueKeyField();
    NamedList<Object> result = new SimpleOrderedMap<>();
    for (SolrInputDocument document : request.getDocuments()) {
        NamedList<NamedList> theTokens = new SimpleOrderedMap<>();
        result.add(document.getFieldValue(uniqueKeyField.getName()).toString(), theTokens);
        for (String name : document.getFieldNames()) {
            // there's no point of providing analysis to unindexed fields.
            SchemaField field = schema.getField(name);
            if (!field.indexed()) {
            NamedList<Object> fieldTokens = new SimpleOrderedMap<>();
            theTokens.add(name, fieldTokens);
            FieldType fieldType = schema.getFieldType(name);
            final String queryValue = request.getQuery();
            Set<BytesRef> termsToMatch;
            try {
                termsToMatch = (queryValue != null && request.isShowMatch()) ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) : EMPTY_BYTES_SET;
            } catch (Exception e) {
                // ignore analysis exceptions since we are applying arbitrary text to all fields
                termsToMatch = EMPTY_BYTES_SET;
            if (request.getQuery() != null) {
                try {
                    AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
                    fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
                } catch (Exception e) {
                // ignore analysis exceptions since we are applying arbitrary text to all fields
            Analyzer analyzer = fieldType.getIndexAnalyzer();
            AnalysisContext analysisContext = new AnalysisContext(fieldType, analyzer, termsToMatch);
            Collection<Object> fieldValues = document.getFieldValues(name);
            NamedList<NamedList<? extends Object>> indexTokens = new SimpleOrderedMap<>();
            for (Object fieldValue : fieldValues) {
                indexTokens.add(String.valueOf(fieldValue), analyzeValue(fieldValue.toString(), analysisContext));
            fieldTokens.add("index", indexTokens);
    return result;
Also used : NamedList(org.apache.solr.common.util.NamedList) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException) XMLStreamException( IOException( FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) SolrInputDocument(org.apache.solr.common.SolrInputDocument) BytesRef(org.apache.lucene.util.BytesRef)

Example 18 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class BlobHandler method handleRequestBody.

public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    String httpMethod = req.getHttpMethod();
    String path = (String) req.getContext().get("path");
    SolrConfigHandler.setWt(req, JSON);
    List<String> pieces = StrUtils.splitSmart(path, '/');
    String blobName = null;
    if (pieces.size() >= 3)
        blobName = pieces.get(2);
    if ("POST".equals(httpMethod)) {
        if (blobName == null || blobName.isEmpty()) {
            rsp.add("error", "Name not found");
        String err = SolrConfigHandler.validateName(blobName);
        if (err != null) {
            log.warn("no blob name");
            rsp.add("error", err);
        if (req.getContentStreams() == null) {
            log.warn("no content stream");
            rsp.add("error", "No stream");
        for (ContentStream stream : req.getContentStreams()) {
            ByteBuffer payload = SimplePostTool.inputStreamToByteArray(stream.getStream(), maxSize);
            MessageDigest m = MessageDigest.getInstance("MD5");
            m.update(payload.array(), payload.position(), payload.limit());
            String md5 = new BigInteger(1, m.digest()).toString(16);
            TopDocs duplicate = req.getSearcher().search(new TermQuery(new Term("md5", md5)), 1);
            if (duplicate.totalHits > 0) {
                rsp.add("error", "duplicate entry");
                forward(req, null, new MapSolrParams((Map) makeMap("q", "md5:" + md5, "fl", "id,size,version,timestamp,blobName")), rsp);
                log.warn("duplicate entry for blob :" + blobName);
            TopFieldDocs docs = req.getSearcher().search(new TermQuery(new Term("blobName", blobName)), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
            long version = 0;
            if (docs.totalHits > 0) {
                Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                Number n = doc.getField("version").numericValue();
                version = n.longValue();
            String id = blobName + "/" + version;
            Map<String, Object> doc = makeMap(ID, id, "md5", md5, "blobName", blobName, VERSION, version, "timestamp", new Date(), "size", payload.limit(), "blob", payload);
            verifyWithRealtimeGet(blobName, version, req, doc);
  "inserting new blob {0} ,size {1}, md5 {2}", doc.get(ID), String.valueOf(payload.limit()), md5));
            indexMap(req, rsp, doc);
  " Successfully Added and committed a blob with id {} and size {} ", id, payload.limit());
    } else {
        int version = -1;
        if (pieces.size() > 3) {
            try {
                version = Integer.parseInt(pieces.get(3));
            } catch (NumberFormatException e) {
                rsp.add("error", "Invalid version" + pieces.get(3));
        if (ReplicationHandler.FILE_STREAM.equals(req.getParams().get(CommonParams.WT))) {
            if (blobName == null) {
                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Please send the request in the format /blob/<blobName>/<version>");
            } else {
                String q = "blobName:{0}";
                if (version != -1)
                    q = "id:{0}/{1}";
                QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
                final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
                if (docs.totalHits > 0) {
                    rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {

                        public void write(OutputStream os) throws IOException {
                            Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                            IndexableField sf = doc.getField("blob");
                            FieldType fieldType = req.getSchema().getField("blob").getType();
                            ByteBuffer buf = (ByteBuffer) fieldType.toObject(sf);
                            if (buf == null) {
                                //should never happen unless a user wrote this document directly
                                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Invalid document . No field called blob");
                            } else {
                                os.write(buf.array(), 0, buf.limit());
                } else {
                    throw new SolrException(SolrException.ErrorCode.NOT_FOUND, StrUtils.formatString("Invalid combination of blobName {0} and version {1}", blobName, version));
        } else {
            String q = "*:*";
            if (blobName != null) {
                q = "blobName:{0}";
                if (version != -1) {
                    q = "id:{0}/{1}";
            forward(req, null, new MapSolrParams((Map) makeMap("q", StrUtils.formatString(q, blobName, version), "fl", "id,size,version,timestamp,blobName,md5", SORT, "version desc")), rsp);
Also used : SolrCore(org.apache.solr.core.SolrCore) OutputStream( TopFieldDocs( SortField( Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) TopDocs( ContentStream(org.apache.solr.common.util.ContentStream) Sort( MessageDigest( SolrException(org.apache.solr.common.SolrException) TermQuery( Term(org.apache.lucene.index.Term) IOException( ByteBuffer(java.nio.ByteBuffer) Date(java.util.Date) FieldType(org.apache.solr.schema.FieldType) IndexableField(org.apache.lucene.index.IndexableField) MapSolrParams(org.apache.solr.common.params.MapSolrParams) QParser( BigInteger(java.math.BigInteger) Map(java.util.Map) Utils.makeMap(org.apache.solr.common.util.Utils.makeMap) Collections.singletonMap(java.util.Collections.singletonMap)

Example 19 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class TermsComponent method fetchTerms.

private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList, boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
    String[] splitTerms = termList.split(",");
    for (int i = 0; i < splitTerms.length; i++) {
        splitTerms[i] = splitTerms[i].trim();
    // Sort the terms once
    IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
    for (String field : fields) {
        FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
        // Since splitTerms is already sorted, this array will also be sorted
        Term[] terms = new Term[splitTerms.length];
        for (int i = 0; i < splitTerms.length; i++) {
            terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
        TermContext[] termContexts = new TermContext[terms.length];
        collectTermContext(topReaderContext, termContexts, terms);
        NamedList<Object> termsMap = new SimpleOrderedMap<>();
        for (int i = 0; i < terms.length; i++) {
            if (termContexts[i] != null) {
                String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
                int docFreq = termContexts[i].docFreq();
                if (!includeTotalTermFreq) {
                    termsMap.add(outTerm, docFreq);
                } else {
                    long totalTermFreq = termContexts[i].totalTermFreq();
                    NamedList<Long> termStats = new SimpleOrderedMap<>();
                    termStats.add("df", (long) docFreq);
                    termStats.add("ttf", totalTermFreq);
                    termsMap.add(outTerm, termStats);
        result.add(field, termsMap);
Also used : SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType)

Example 20 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method getListedTermCounts.

   * Computes the term-&gt;count counts for the specified term values relative to the 
   * @param field the name of the field to compute term counts against
   * @param parsed contains the docset to compute term counts relative to
   * @param terms a list of term values (in the specified field) to compute the counts for 
protected NamedList<Integer> getListedTermCounts(String field, final ParsedParams parsed, List<String> terms) throws IOException {
    SchemaField sf = searcher.getSchema().getField(field);
    FieldType ft = sf.getType();
    NamedList<Integer> res = new NamedList<>();
    for (String term : terms) {
        int count = searcher.numDocs(ft.getFieldQuery(null, sf, term),;
        res.add(term, count);
    return res;
Also used : SchemaField(org.apache.solr.schema.SchemaField) NamedList(org.apache.solr.common.util.NamedList) FieldType(org.apache.solr.schema.FieldType)


FieldType (org.apache.solr.schema.FieldType)93 SchemaField (org.apache.solr.schema.SchemaField)37 SolrException (org.apache.solr.common.SolrException)29 ArrayList (java.util.ArrayList)23 BytesRef (org.apache.lucene.util.BytesRef)23 NamedList (org.apache.solr.common.util.NamedList)23 IOException ( SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)15 IndexSchema (org.apache.solr.schema.IndexSchema)14 Query ( BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 Analyzer (org.apache.lucene.analysis.Analyzer)12 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 StrField (org.apache.solr.schema.StrField)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 DocIterator ( DocList (