Search in sources :

Example 36 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class DictionaryNormalizationFilter method end.

@Override
public void end() {
    List<String> sortedTokens = new ArrayList<String>(tokens);
    Collections.sort(sortedTokens);
    for (String token : sortedTokens) {
        Document document = new Document();
        document.setField("token", token);
        super.document(document);
    }
    super.end();
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document)

Example 37 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class ElasticWriter method procesBuffer.

void procesBuffer() {
    try {
        StringBuilder bulkRequest = new StringBuilder();
        for (Document document : buffer) {
            Map<String, Object> jsonDocument = mapToJson(document, detectNumberValues);
            if (jsonDocument.isEmpty()) {
                continue;
            }
            String id;
            if (Strings.isStringEmpty(idField)) {
                id = UUID.randomUUID().toString();
            } else {
                id = document.getFieldValue(idField);
            }
            String index = ElasticHelper.getIndexFromUrl(location);
            String type = ElasticHelper.getTypeFromUrl(location);
            String bulkMethod = createBulkMethod("index", index, type, id);
            String json = gson.toJson(jsonDocument);
            bulkRequest.append(bulkMethod).append(" \n");
            bulkRequest.append(json).append(" \n");
        }
        String bulkUrl = ElasticHelper.getBulkUrl(location);
        HTTPHelper.post(bulkUrl, bulkRequest.toString(), "application/json");
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
}
Also used : URISyntaxException(java.net.URISyntaxException) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 38 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class ElasticWriter method mapToJsonString.

public static String mapToJsonString(List<Document> documentList, boolean detectNumberValues) {
    List<Map<String, Object>> documentMap = new ArrayList<Map<String, Object>>();
    for (Document document : documentList) {
        documentMap.add(mapToJson(document, detectNumberValues));
    }
    Gson gson = new GsonBuilder().setPrettyPrinting().create();
    String json = gson.toJson(documentMap);
    return json;
}
Also used : GsonBuilder(com.google.gson.GsonBuilder) Gson(com.google.gson.Gson) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 39 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class AggregationCountFilter method end.

@Override
public void end() {
    for (Map.Entry<String, AtomicLongMap<String>> entry : aggregation.entrySet()) {
        String fieldName = entry.getKey();
        AtomicLongMap<String> fieldValue = entry.getValue();
        for (Map.Entry<String, Long> fieldValueEntry : fieldValue.asMap().entrySet()) {
            String value = fieldValueEntry.getKey();
            Long count = fieldValueEntry.getValue();
            Document document = new Document();
            document.setField("value", value);
            document.setField("count", count);
            document.setField("type", fieldName);
            super.document(document);
        }
    }
    super.end();
}
Also used : AtomicLongMap(com.google.common.util.concurrent.AtomicLongMap) Document(de.tblsoft.solr.pipeline.bean.Document) Map(java.util.Map) HashMap(java.util.HashMap) AtomicLongMap(com.google.common.util.concurrent.AtomicLongMap)

Example 40 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class BlacklistFieldFilter method document.

@Override
public void document(Document document) {
    Document newDocument = new Document();
    for (Field field : document.getFields()) {
        if (fields == null || fields.contains(field.getName())) {
            List<String> newValues = new ArrayList<String>();
            for (String value : field.getValues()) {
                if (!topicValues.get(topic).contains(value)) {
                    newValues.add(value);
                }
            }
            newDocument.setField(field.getName(), newValues);
        } else {
            newDocument.setField(field.getName(), field.getValues());
        }
    }
    super.document(newDocument);
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) ArrayList(java.util.ArrayList) Document(de.tblsoft.solr.pipeline.bean.Document)

Aggregations

Document (de.tblsoft.solr.pipeline.bean.Document)51 ArrayList (java.util.ArrayList)9 Map (java.util.Map)9 Test (org.junit.Test)9 Field (de.tblsoft.solr.pipeline.bean.Field)8 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)4 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 GsonBuilder (com.google.gson.GsonBuilder)3 Match (oi.thekraken.grok.api.Match)3 AtomicLongMap (com.google.common.util.concurrent.AtomicLongMap)2 Gson (com.google.gson.Gson)2 JsonElement (com.google.gson.JsonElement)2 DocumentContext (com.jayway.jsonpath.DocumentContext)2 PathNotFoundException (com.jayway.jsonpath.PathNotFoundException)2 DocumentBuilder (de.tblsoft.solr.pipeline.bean.DocumentBuilder)2 SimpleMapping (de.tblsoft.solr.pipeline.filter.SimpleMapping)2 File (java.io.File)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2