Search in sources :

Example 6 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class SolrFieldCounterTest method testSolrFieldCounterFilter.

@Test
public void testSolrFieldCounterFilter() {
    configure();
    Document document1 = DocumentBuilder.document().field("field1", "value1").create();
    Document document2 = DocumentBuilder.document().field("field1", "value1").field("field2", "value2").create();
    document(document1, document2);
    assertFiled("field2", "1");
    assertFiled("field1", "2");
    assertNumberOfDocuments(1);
    assertNumberOfFields(2);
    assertInitWasDelegated();
    assertEndWasDelegated();
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document) AbstractFilterTest(de.tblsoft.solr.pipeline.test.AbstractFilterTest) Test(org.junit.Test)

Example 7 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class CSVReader method read.

@Override
public void read() {
    String absoluteFilename;
    boolean addMeta = false;
    try {
        String charset = getProperty("charset", StandardCharsets.UTF_8.name());
        String filename = getProperty("filename", null);
        absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
        addMeta = getPropertyAsBoolean("addMeta", false);
        Long maxRows = getPropertyAsInteger("maxRows", Long.MAX_VALUE);
        String delimiter = getProperty("delimiter", ",");
        String arrayDelimiter = getProperty("arrayDelimiter", null);
        String[] headers = getPropertyAsArray("headers", null);
        InputStream in = IOUtils.getInputStream(absoluteFilename);
        java.io.Reader reader = new InputStreamReader(in, charset);
        CSVFormat format = CSVFormat.RFC4180;
        if (headers == null) {
            format = format.withHeader();
        } else {
            format = format.withHeader(headers);
        }
        format = format.withDelimiter(delimiter.charAt(0));
        CSVParser parser = format.parse(reader);
        Iterator<CSVRecord> csvIterator = parser.iterator();
        long rowNumber = 0;
        while (csvIterator.hasNext()) {
            if (rowNumber >= maxRows) {
                break;
            }
            rowNumber++;
            CSVRecord record = csvIterator.next();
            Map<String, Integer> header = parser.getHeaderMap();
            Document document = new Document();
            for (Map.Entry<String, Integer> entry : header.entrySet()) {
                String key = entry.getKey();
                try {
                    String value = record.get(key);
                    if (StringUtils.isEmpty(arrayDelimiter)) {
                        document.addField(key, value);
                    } else {
                        List<String> valueList = new ArrayList<String>();
                        String[] values = value.split(arrayDelimiter);
                        if (values.length > 0) {
                            for (String val : values) {
                                if (StringUtils.isNotEmpty(val)) {
                                    valueList.add(val);
                                }
                            }
                        }
                        document.setField(key, valueList);
                    }
                } catch (IllegalArgumentException e) {
                }
            }
            if (addMeta) {
                document.addField("rowNumber", String.valueOf(rowNumber));
                document.addField("fileName", absoluteFilename);
            }
            executer.document(document);
        }
        // executer.end();
        in.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Document(de.tblsoft.solr.pipeline.bean.Document) CSVParser(org.apache.commons.csv.CSVParser) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord) Map(java.util.Map)

Example 8 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class ElasticReader method read.

@Override
public void read() {
    GsonBuilder builder = new GsonBuilder();
    gson = builder.create();
    String response = "";
    String pagedUrl = "";
    String scrollId = "";
    boolean hasHits = false;
    try {
        url = getProperty("url", null);
        scroll = getProperty("scroll", "1m");
        String scrollBaseUrl = ElasticHelper.getScrollUrl(url);
        pagedUrl = url + "&scroll=" + scroll;
        do {
            response = HTTPHelper.get(pagedUrl);
            JsonElement jsonResponse = gson.fromJson(response, JsonElement.class);
            scrollId = jsonResponse.getAsJsonObject().get("_scroll_id").getAsString();
            Iterator<JsonElement> hitsIterator = jsonResponse.getAsJsonObject().get("hits").getAsJsonObject().get("hits").getAsJsonArray().iterator();
            hasHits = false;
            while (hitsIterator.hasNext()) {
                hasHits = true;
                Document document = new Document();
                for (Entry<String, JsonElement> entry : hitsIterator.next().getAsJsonObject().get("_source").getAsJsonObject().entrySet()) {
                    if (entry.getValue().isJsonArray()) {
                    } else if (entry.getValue().isJsonPrimitive()) {
                        document.addField(entry.getKey(), entry.getValue().getAsString());
                    }
                }
                executer.document(document);
            }
            pagedUrl = scrollBaseUrl + "?scroll=" + scroll + "&scroll_id=" + scrollId;
        } while (hasHits);
    // executer.end();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GsonBuilder(com.google.gson.GsonBuilder) JsonElement(com.google.gson.JsonElement) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 9 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class GCLogReader method processLine.

void processLine(String line) {
    // executer.field("raw", line);
    System.out.println(line);
    Match gm = grok.match(line);
    gm.captures();
    Map<String, Object> m = gm.toMap();
    Document document = new Document();
    for (Map.Entry<String, Object> entry : m.entrySet()) {
        Object value = entry.getValue();
        document.addField(entry.getKey(), String.valueOf(value));
    }
    if (!m.isEmpty()) {
        document.addField("filename", currentFileName);
        if (keepRaw) {
            document.addField("raw", line);
        }
        executer.document(document);
    }
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document) Map(java.util.Map) Match(oi.thekraken.grok.api.Match)

Example 10 with Document

use of de.tblsoft.solr.pipeline.bean.Document in project solr-cmd-utils by tblsoft.

the class GrokReader method processLine.

void processLine(String line) {
    // executer.field("raw", line);
    // System.out.println(line);
    Match gm = grok.match(line);
    gm.captures();
    Map<String, Object> m = gm.toMap();
    Document document = new Document();
    for (Map.Entry<String, Object> entry : m.entrySet()) {
        Object value = entry.getValue();
        document.addField(entry.getKey(), String.valueOf(value));
    }
    if (!m.isEmpty()) {
        document.addField("filename", currentFileName);
        if (keepRaw) {
            document.addField("raw", line);
        }
        executer.document(document);
    }
}
Also used : Document(de.tblsoft.solr.pipeline.bean.Document) Map(java.util.Map) Match(oi.thekraken.grok.api.Match)

Aggregations

Document (de.tblsoft.solr.pipeline.bean.Document)51 ArrayList (java.util.ArrayList)9 Map (java.util.Map)9 Test (org.junit.Test)9 Field (de.tblsoft.solr.pipeline.bean.Field)8 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)4 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 GsonBuilder (com.google.gson.GsonBuilder)3 Match (oi.thekraken.grok.api.Match)3 AtomicLongMap (com.google.common.util.concurrent.AtomicLongMap)2 Gson (com.google.gson.Gson)2 JsonElement (com.google.gson.JsonElement)2 DocumentContext (com.jayway.jsonpath.DocumentContext)2 PathNotFoundException (com.jayway.jsonpath.PathNotFoundException)2 DocumentBuilder (de.tblsoft.solr.pipeline.bean.DocumentBuilder)2 SimpleMapping (de.tblsoft.solr.pipeline.filter.SimpleMapping)2 File (java.io.File)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2