Search in sources :

Example 31 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class JsonPathReader method read.

public void read() {
    SimpleMapping simpleMapping = new SimpleMapping(getPropertyAsList("mapping", new ArrayList<String>()));
    Map<String, List<String>> mapping = simpleMapping.getMapping();
    try {
        String rootPath = getProperty("rootPath", "$");
        DocumentContext context = loadJsonContext();
        List<Object> jsonHits = context.read(rootPath);
        for (Object obj : jsonHits) {
            Document document = new Document();
            for (Map.Entry<String, List<String>> mappingEntry : mapping.entrySet()) {
                try {
                    Object parsedValue = JsonPath.parse(obj).read(mappingEntry.getKey());
                    for (String target : mappingEntry.getValue()) {
                        document.setField(target, parsedValue);
                    }
                } catch (PathNotFoundException e) {
                // ignore
                }
            }
            executer.document(document);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : ArrayList(java.util.ArrayList) Document(de.tblsoft.solr.pipeline.bean.Document) IOException(java.io.IOException) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) SimpleMapping(de.tblsoft.solr.pipeline.filter.SimpleMapping) ArrayList(java.util.ArrayList) List(java.util.List) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) DocumentContext(com.jayway.jsonpath.DocumentContext) Map(java.util.Map)

Example 32 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class JsonReader method read.

public void read() {
    try {
        ScriptEngineManager mgr = new ScriptEngineManager();
        engine = mgr.getEngineByName("JavaScript");
        String internalFilename = getProperty("filename", null);
        filename = IOUtils.getAbsoluteFile(getBaseDir(), internalFilename);
        String internalJavaScriptFilename = getProperty("javaScriptFilename", null);
        javaScriptFilename = IOUtils.getAbsoluteFile(getBaseDir(), internalJavaScriptFilename);
        rootPath = getProperty("rootPath", "$");
        script = FileUtils.readFileToString(new File(javaScriptFilename));
        cx = Context.enter();
        JsonSurfer surfer = JsonSurferGson.INSTANCE;
        java.io.Reader sample = new FileReader(filename);
        surfer.configBuilder().bind(rootPath, new JsonPathListener() {

            public void onValue(Object value, ParsingContext context) {
                Scriptable scope = cx.initStandardObjects();
                List<Document> output = new ArrayList<Document>();
                ScriptableObject.putProperty(scope, "documentBuilder", Context.javaToJS(new DocumentBuilder(), scope));
                ScriptableObject.putProperty(scope, "output", Context.javaToJS(output, scope));
                String exec = "var input = " + value.toString() + ";" + script;
                cx.evaluateString(scope, exec, filename, 1, null);
                for (Document out : output) {
                    executer.document(out);
                }
            }
        }).buildAndSurf(sample);
        sample.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : ParsingContext(org.jsfr.json.ParsingContext) ScriptEngineManager(javax.script.ScriptEngineManager) JsonPathListener(org.jsfr.json.JsonPathListener) ArrayList(java.util.ArrayList) Scriptable(org.mozilla.javascript.Scriptable) Document(de.tblsoft.solr.pipeline.bean.Document) DocumentBuilder(de.tblsoft.solr.pipeline.bean.DocumentBuilder) FileReader(java.io.FileReader) ScriptableObject(org.mozilla.javascript.ScriptableObject) JsonSurfer(org.jsfr.json.JsonSurfer) File(java.io.File)

Example 33 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class GrokFilter method processLine.

void processLine(String line) {
    // executer.field("raw", line);
    // System.out.println(line);
    Match gm = grok.match(line);
    gm.captures();
    Map<String, Object> m = gm.toMap();
    Document document = new Document();
    for (Map.Entry<String, Object> entry : m.entrySet()) {
        Object value = entry.getValue();
        document.addField(entry.getKey(), String.valueOf(value));
    }
/*
        if (!m.isEmpty()) {
            document.addField("filename", currentFileName);
            if (keepRaw) {
                document.addField("raw", line);
            }
        }
        executer.document(document);
        */
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document) Map(java.util.Map) Match(oi.thekraken.grok.api.Match)

Example 34 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class HttpFilter method processQueue.

void processQueue() {
    List<Future<Document>> documentFutures = new ArrayList<Future<Document>>();
    for (Document documentFromQueue : documentQueue) {
        HttpWorker worker = new HttpWorker(documentFromQueue, httpclient, urlField, userAgent);
        Future<Document> future = executor.submit(worker);
        documentFutures.add(future);
    }
    for (Future<Document> documentFuture : documentFutures) {
        try {
            Document d = documentFuture.get();
            super.document(d);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        }
    }
    documentQueue.clear();
}
Also used : ArrayList(java.util.ArrayList) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 35 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class JsonWriter method mapToJsonString.

public static String mapToJsonString(List<Document> documentList) {
    List<Map<String, Object>> documentMap = new ArrayList<Map<String, Object>>();
    for (Document document : documentList) {
        documentMap.add(mapToJson(document));
    }
    Gson gson = new GsonBuilder().setPrettyPrinting().create();
    String json = gson.toJson(documentMap);
    return json;
}
Also used : GsonBuilder(com.google.gson.GsonBuilder) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) Document(de.tblsoft.solr.pipeline.bean.Document) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Document (de.tblsoft.solr.pipeline.bean.Document)51 ArrayList (java.util.ArrayList)9 Map (java.util.Map)9 Test (org.junit.Test)9 Field (de.tblsoft.solr.pipeline.bean.Field)8 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)4 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 GsonBuilder (com.google.gson.GsonBuilder)3 Match (oi.thekraken.grok.api.Match)3 AtomicLongMap (com.google.common.util.concurrent.AtomicLongMap)2 Gson (com.google.gson.Gson)2 JsonElement (com.google.gson.JsonElement)2 DocumentContext (com.jayway.jsonpath.DocumentContext)2 PathNotFoundException (com.jayway.jsonpath.PathNotFoundException)2 DocumentBuilder (de.tblsoft.solr.pipeline.bean.DocumentBuilder)2 SimpleMapping (de.tblsoft.solr.pipeline.filter.SimpleMapping)2 File (java.io.File)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2