Search in sources :

Example 6 with Field

use of de.tblsoft.solr.pipeline.bean.Field in project solr-cmd-utils by tblsoft.

the class TopicAggregationFilter method mergeDocuments.

protected Document mergeDocuments(Document doc1, Document doc2) {
    Map<String, HashSet<String>> mergedFields = new HashMap<String, HashSet<String>>();
    for (Field field : doc1.getFields()) {
        mergedFields.put(field.getName(), new HashSet<String>(field.getValues()));
    }
    for (Field field : doc2.getFields()) {
        if (!mergedFields.containsKey(field.getName())) {
            mergedFields.put(field.getName(), new HashSet<String>());
        }
        mergedFields.get(field.getName()).addAll(field.getValues());
    }
    Document mergedDoc = new Document();
    for (Map.Entry<String, HashSet<String>> entry : mergedFields.entrySet()) {
        Field mergedField = new Field(entry.getKey(), new ArrayList<String>(entry.getValue()));
        mergedDoc.addField(mergedField);
    }
    return mergedDoc;
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 7 with Field

use of de.tblsoft.solr.pipeline.bean.Field in project solr-cmd-utils by tblsoft.

the class WhitelistTopicTermsFilter method document.

@Override
public void document(Document document) {
    Field topic = document.getField(fieldTopic);
    Field value = document.getField(fieldValue);
    boolean hasOverriden = false;
    if (override) {
        if (topic != null && value != null) {
            if (topicValues.containsKey(topic.getValue()) && topicValues.get(topic.getValue()).containsKey(value.getValue())) {
                Document docOverride = topicValues.get(topic.getValue()).get(value.getValue());
                for (Field field : docOverride.getFields()) {
                    document.setField(field.getName(), field.getValues());
                }
                super.document(document);
                hasOverriden = true;
                topicsOverriden.get(topic.getValue()).put(value.getValue(), true);
            }
        }
    }
    if (!hasOverriden) {
        super.document(document);
    }
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) Document(de.tblsoft.solr.pipeline.bean.Document)

Example 8 with Field

use of de.tblsoft.solr.pipeline.bean.Field in project solr-cmd-utils by tblsoft.

the class CSVWriter method document.

@Override
public void document(Document document) {
    if (firstDocument) {
        try {
            if (headers == null) {
                headers = getFieldNames(document);
            }
            // PrintWriter out = new PrintWriter(absoluteFilename);
            OutputStream out = IOUtils.getOutputStream(absoluteFilename);
            CSVFormat format = CSVFormat.RFC4180;
            if (withHeaders) {
                format = format.withDelimiter(delimiter.charAt(0)).withHeader(headers);
            } else {
                format = format.withDelimiter(delimiter.charAt(0));
            }
            Writer out1 = new BufferedWriter(new OutputStreamWriter(out));
            printer = new CSVPrinter(out1, format);
            firstDocument = false;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    try {
        List<List<String>> csvRows = new ArrayList<List<String>>();
        List<String> csvList = new ArrayList<String>();
        for (String headerField : headers) {
            Field field = document.getField(headerField);
            String value = null;
            if (field != null && field.getValues() != null) {
                value = Joiner.on(multiValueSeperator).skipNulls().join(field.getValues());
            }
            csvList.add(value);
        }
        csvRows.add(csvList);
        printer.printRecords(csvRows);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    super.document(document);
}
Also used : OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) CSVPrinter(org.apache.commons.csv.CSVPrinter) Field(de.tblsoft.solr.pipeline.bean.Field) CSVFormat(org.apache.commons.csv.CSVFormat) OutputStreamWriter(java.io.OutputStreamWriter) ArrayList(java.util.ArrayList) List(java.util.List) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 9 with Field

use of de.tblsoft.solr.pipeline.bean.Field in project solr-cmd-utils by tblsoft.

the class SolrFeeder method document.

@Override
public void document(Document document) {
    SolrInputDocument inputDoc = new SolrInputDocument();
    for (Field field : document.getFields()) {
        if (!isFieldIgnored(field.getName())) {
            inputDoc.addField(field.getName(), field.getValues());
        }
    }
    try {
        server.add(inputDoc);
    } catch (SolrServerException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    super.document(document);
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException)

Example 10 with Field

use of de.tblsoft.solr.pipeline.bean.Field in project solr-cmd-utils by tblsoft.

the class DocumentDiffer method compare.

public static DocumentDiff compare(Document d1, Document d2) {
    DocumentDiff diff = new DocumentDiff();
    for (Field field : d1.getFields()) {
        List<String> values1 = field.getValues();
        List<String> values2 = d2.getFieldValues(field.getName());
        d2.deleteField(field.getName());
        FieldDiff.DiffType changeType = getChangeType(values1, values2);
        FieldDiff fieldDiff = new FieldDiff();
        fieldDiff.setDiffType(changeType);
        if (!changeType.equals(FieldDiff.DiffType.EQUAL)) {
            fieldDiff.setOldValues(values1);
            fieldDiff.setNewValue(values2);
            diff.getFieldDiffs().add(fieldDiff);
        }
    }
    for (Field field : d2.getFields()) {
        FieldDiff fieldDiff = new FieldDiff();
        fieldDiff.setDiffType(FieldDiff.DiffType.CREATE);
        fieldDiff.setNewValue(field.getValues());
        diff.getFieldDiffs().add(fieldDiff);
    }
    return diff;
}
Also used : Field(de.tblsoft.solr.pipeline.bean.Field) DocumentDiff(de.tblsoft.solr.pipeline.bean.DocumentDiff) FieldDiff(de.tblsoft.solr.pipeline.bean.FieldDiff)

Aggregations

Field (de.tblsoft.solr.pipeline.bean.Field)27 Document (de.tblsoft.solr.pipeline.bean.Document)8 Test (org.junit.Test)5 AbstractFilterTest (de.tblsoft.solr.pipeline.test.AbstractFilterTest)4 Ignore (org.junit.Ignore)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 StrSubstitutor (org.apache.commons.lang3.text.StrSubstitutor)2 DocumentDiff (de.tblsoft.solr.pipeline.bean.DocumentDiff)1 FieldDiff (de.tblsoft.solr.pipeline.bean.FieldDiff)1 BufferedWriter (java.io.BufferedWriter)1 OutputStream (java.io.OutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 Writer (java.io.Writer)1 HashMap (java.util.HashMap)1 List (java.util.List)1 StringTokenizer (java.util.StringTokenizer)1 CSVFormat (org.apache.commons.csv.CSVFormat)1 CSVPrinter (org.apache.commons.csv.CSVPrinter)1