Search in sources :

Example 16 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class XmlSitemapReader method parseSitemapUrlNode.

void parseSitemapUrlNode(NodeList url, String sitemapIndexUrl) {
    Document document = new Document();
    if (!Strings.isNullOrEmpty(sitemapIndexUrl)) {
        document.addField("sitemapIndexUrl", sitemapIndexUrl);
    }
    for (int k = 0; k < url.getLength(); k++) {
        Node noder = url.item(k);
        if (1 == noder.getNodeType()) {
            String name = noder.getNodeName();
            String value = noder.getFirstChild().getNodeValue();
            document.addField(name, value);
        }
    }
    executer.document(document);
}
Also used : Node(org.w3c.dom.Node) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 17 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class TokenizerFilter method document.

@Override
public void document(Document document) {
    for (Field field : document.getFields()) {
        for (String value : field.getValues()) {
            StringTokenizer tokenizer = new StringTokenizer(value, delim);
            while (tokenizer.hasMoreElements()) {
                String token = tokenizer.nextToken();
                Document newDocument = new Document();
                newDocument.addField("token", token);
                super.document(newDocument);
            }
        }
    }
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) StringTokenizer(java.util.StringTokenizer) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 18 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class TopicAggregationFilter method end.

@Override
public void end() {
    for (Map.Entry<String, Document> entry : docs.entrySet()) {
        Document doc = entry.getValue();
        super.document(doc);
    }
    ;
    super.end();
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document)

Example 19 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class TopicAggregationFilter method mergeDocuments.

protected Document mergeDocuments(Document doc1, Document doc2) {
    Map<String, HashSet<String>> mergedFields = new HashMap<String, HashSet<String>>();
    for (Field field : doc1.getFields()) {
        mergedFields.put(field.getName(), new HashSet<String>(field.getValues()));
    }
    for (Field field : doc2.getFields()) {
        if (!mergedFields.containsKey(field.getName())) {
            mergedFields.put(field.getName(), new HashSet<String>());
        }
        mergedFields.get(field.getName()).addAll(field.getValues());
    }
    Document mergedDoc = new Document();
    for (Map.Entry<String, HashSet<String>> entry : mergedFields.entrySet()) {
        Field mergedField = new Field(entry.getKey(), new ArrayList<String>(entry.getValue()));
        mergedDoc.addField(mergedField);
    }
    return mergedDoc;
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 20 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class TopicMergeFilter method document.

@Override
public void document(Document document) {
    String topic = document.getFieldValue(fieldTopic);
    String value = document.getFieldValue(fieldValue);
    if (fieldValueLowercase) {
        value = value.toLowerCase();
    }
    if (!topicValues.containsKey(topic)) {
        topicValues.put(topic, new HashMap<String, Document>());
    }
    if (!topicValues.get(topic).containsKey(value)) {
        // add
        topicValues.get(topic).put(value, document);
    } else {
        // merge
        Document oldDoc = topicValues.get(topic).get(value);
        Document mergedDoc = mergeDocuments(oldDoc, document);
        topicValues.get(topic).put(value, mergedDoc);
    }
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document)

Aggregations

Document (de.tblsoft.solr.pipeline.bean.Document)51 ArrayList (java.util.ArrayList)9 Map (java.util.Map)9 Test (org.junit.Test)9 Field (de.tblsoft.solr.pipeline.bean.Field)8 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)4 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 GsonBuilder (com.google.gson.GsonBuilder)3 Match (oi.thekraken.grok.api.Match)3 AtomicLongMap (com.google.common.util.concurrent.AtomicLongMap)2 Gson (com.google.gson.Gson)2 JsonElement (com.google.gson.JsonElement)2 DocumentContext (com.jayway.jsonpath.DocumentContext)2 PathNotFoundException (com.jayway.jsonpath.PathNotFoundException)2 DocumentBuilder (de.tblsoft.solr.pipeline.bean.DocumentBuilder)2 SimpleMapping (de.tblsoft.solr.pipeline.filter.SimpleMapping)2 File (java.io.File)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2