Examples with Document - de.tblsoft.solr.pipeline.bean.Document

Example 26 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class JavaScriptFilter method document.

@Override
public void document(Document document) {
    Context cx = Context.enter();
    Scriptable scope = cx.initStandardObjects();
    List<Document> output = new ArrayList<Document>();
    ScriptableObject.putProperty(scope, "documentBuilder", Context.javaToJS(new DocumentBuilder(), scope));
    ScriptableObject.putProperty(scope, "input", Context.javaToJS(document, scope));
    ScriptableObject.putProperty(scope, "output", Context.javaToJS(output, scope));
    cx.evaluateString(scope, script, filename, 1, null);
    for (Document out : output) {
        super.document(out);
    }
}

Also used : Context(org.mozilla.javascript.Context) DocumentBuilder(de.tblsoft.solr.pipeline.bean.DocumentBuilder) ArrayList(java.util.ArrayList) Scriptable(org.mozilla.javascript.Scriptable) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 27 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class OpenThesaurusReader method line2document.

void line2document(String[] lineParts) {
    Document document = new Document();
    document.setField("tokens", Arrays.asList(lineParts));
    executer.document(document);
}

Also used : Document(de.tblsoft.solr.pipeline.bean.Document)

Example 28 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class ThreadDumpReader method endDocument.

void endDocument() {
    field("runId", runId);
    field("description", currentDescription);
    field("date", currentDate);
    field("fileName", currentFileName);
    field("directory", currentDirectory);
    field("position", String.valueOf(position));
    position++;
    executer.document(currentDocument);
    currentDocument = new Document();
}

Also used : Document(de.tblsoft.solr.pipeline.bean.Document)

Example 29 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class ElasticJsonPathReader method read.

public void read() {
    String response = "";
    String pagedUrl = "";
    String scrollId = "";
    boolean hasHits = false;
    SimpleMapping simpleMapping = new SimpleMapping(getPropertyAsList("mapping", new ArrayList<String>()));
    Map<String, List<String>> mapping = simpleMapping.getMapping();
    try {
        url = getProperty("url", null);
        scroll = getProperty("scroll", "1m");
        String scrollBaseUrl = ElasticHelper.getScrollUrl(url);
        pagedUrl = url + "&scroll=" + scroll;
        do {
            response = HTTPHelper.get(pagedUrl);
            DocumentContext context = JsonPath.parse(response);
            scrollId = context.read("$['_scroll_id']");
            List<Object> elasticHits = context.read("$['hits']['hits'][*]['_source']");
            hasHits = false;
            int count = 0;
            for (Object obj : elasticHits) {
                hasHits = true;
                Document document = new Document();
                for (Map.Entry<String, List<String>> mappingEntry : mapping.entrySet()) {
                    try {
                        Object parsedValue = JsonPath.parse(obj).read(mappingEntry.getKey());
                        for (String target : mappingEntry.getValue()) {
                            document.setField(target, parsedValue);
                        }
                    } catch (PathNotFoundException e) {
                    // ignore
                    }
                }
                executer.document(document);
            }
            pagedUrl = scrollBaseUrl + "?scroll=" + scroll + "&scroll_id=" + scrollId;
        } while (hasHits);
    // executer.end();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Also used : ArrayList(java.util.ArrayList) Document(de.tblsoft.solr.pipeline.bean.Document) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) SimpleMapping(de.tblsoft.solr.pipeline.filter.SimpleMapping) List(java.util.List) ArrayList(java.util.ArrayList) PathNotFoundException(com.jayway.jsonpath.PathNotFoundException) DocumentContext(com.jayway.jsonpath.DocumentContext) Map(java.util.Map)

Example 30 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class FilelineReader method read.

public void read() {
    String absoluteFilename;
    boolean addMeta = false;
    String charset = getProperty("charset", StandardCharsets.UTF_8.name());
    String filename = getProperty("filename", null);
    absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
    addMeta = getPropertyAsBoolean("addMeta", false);
    Long maxRows = getPropertyAsInteger("maxRows", Long.MAX_VALUE);
    String delimiter = getProperty("delimiter", ",");
    String commentPrefix = getProperty("commentPrefix", "#");
    String fieldName = getProperty("fieldName", "line");
    try {
        BufferedReader br = Files.newReader(new File(absoluteFilename), Charset.forName(charset));
        String line;
        long countLines = 0;
        while ((line = br.readLine()) != null) {
            countLines++;
            if (countLines > maxRows) {
                break;
            }
            if (line.startsWith(commentPrefix)) {
                continue;
            }
            if (Strings.isNullOrEmpty(line)) {
                continue;
            }
            Document document = new Document();
            for (String item : Splitter.on(delimiter).trimResults().split(line)) {
                document.addField(fieldName, item);
            }
            executer.document(document);
        }
        br.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Also used : BufferedReader(java.io.BufferedReader) Document(de.tblsoft.solr.pipeline.bean.Document) File(java.io.File)

Aggregations

Document (de.tblsoft.solr.pipeline.bean.Document)51 ArrayList (java.util.ArrayList)9 Map (java.util.Map)9 Test (org.junit.Test)9 Field (de.tblsoft.solr.pipeline.bean.Field)8 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)4 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 GsonBuilder (com.google.gson.GsonBuilder)3 Match (oi.thekraken.grok.api.Match)3 AtomicLongMap (com.google.common.util.concurrent.AtomicLongMap)2 Gson (com.google.gson.Gson)2 JsonElement (com.google.gson.JsonElement)2 DocumentContext (com.jayway.jsonpath.DocumentContext)2 PathNotFoundException (com.jayway.jsonpath.PathNotFoundException)2 DocumentBuilder (de.tblsoft.solr.pipeline.bean.DocumentBuilder)2 SimpleMapping (de.tblsoft.solr.pipeline.filter.SimpleMapping)2 File (java.io.File)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2