Search in sources :

Example 51 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.

the class EntityExtractionFilter method readEntityDictionary.

void readEntityDictionary(String filename, String charset, String type) {
    String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
    CSVFormat format = CSVFormat.RFC4180;
    format = format.withHeader();
    try {
        InputStream in = IOUtils.getInputStream(absoluteFilename);
        java.io.Reader reader = new InputStreamReader(in, charset);
        CSVParser parser = format.parse(reader);
        for (CSVRecord record : parser.getRecords()) {
            String entityName = record.get("name");
            String entityUrl = record.get("url");
            StringTokenizer tokenizer = new StringTokenizer(entityName);
            int tokenCount = 0;
            StringBuilder tokenPhrase = new StringBuilder();
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if (dictionaryMap.size() < tokenCount + 1) {
                    dictionaryMap.add(new HashMap<String, Entity>());
                }
                if (tokenCount > 0) {
                    tokenPhrase.append(" ");
                }
                tokenPhrase.append(token);
                Map<String, Entity> entityMap = dictionaryMap.get(tokenCount);
                Entity newEntity = new Entity(entityName, !tokenizer.hasMoreTokens());
                newEntity.setUrl(entityUrl);
                newEntity.setType(type);
                String key = normalize(tokenPhrase.toString());
                Entity entity = entityMap.get(key);
                if (entity == null) {
                    entityMap.put(key, newEntity);
                } else if (newEntity.getName().length() < entity.getName().length()) {
                    entityMap.put(key, newEntity);
                }
                tokenCount++;
            }
        }
        in.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) IOException(java.io.IOException) CSVParser(org.apache.commons.csv.CSVParser) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 52 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.

the class SynonymNormalizationFilter method init.

@Override
public void init() {
    fieldSynonym = getProperty("fieldSynonym", null);
    arrayDelimiter = getProperty("arrayDelimiter", ";");
    mustExist = getPropertyAsBoolean("mustExist", false);
    synonymLookup = new HashMap<String, String>();
    mainWordLookup = new HashSet<String>();
    InputStream in = null;
    try {
        String filename = getProperty("filename", null);
        String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
        in = IOUtils.getInputStream(absoluteFilename);
        java.io.Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8.name());
        CSVFormat format = CSVFormat.RFC4180.withDelimiter(',');
        CSVParser parser = format.parse(reader);
        Iterator<CSVRecord> csvIterator = parser.iterator();
        while (csvIterator.hasNext()) {
            CSVRecord record = csvIterator.next();
            try {
                String mainWord = record.get(0);
                String[] synonyms = record.get(1).split(arrayDelimiter);
                if (synonyms != null && synonyms.length > 0) {
                    for (String synonym : synonyms) {
                        if (StringUtils.isNotEmpty(synonym)) {
                            synonymLookup.put(synonym, mainWord);
                        }
                    }
                }
                mainWordLookup.add(mainWord);
            } catch (IllegalArgumentException ignored) {
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException ignored) {
            }
        }
    }
    super.init();
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) IOException(java.io.IOException) IOException(java.io.IOException) CSVParser(org.apache.commons.csv.CSVParser) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 53 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.

the class BlacklistTopicFilter method init.

@Override
public void init() {
    topicValues = new HashMap<String, HashSet<String>>();
    InputStream in = null;
    try {
        String filename = getProperty("filename", null);
        String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
        in = IOUtils.getInputStream(absoluteFilename);
        java.io.Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8.name());
        CSVFormat format = CSVFormat.RFC4180.withHeader().withDelimiter(',');
        CSVParser parser = format.parse(reader);
        Iterator<CSVRecord> csvIterator = parser.iterator();
        while (csvIterator.hasNext()) {
            CSVRecord record = csvIterator.next();
            String topic = record.get(0);
            String value = record.get(1).toLowerCase();
            if (!topicValues.containsKey(topic)) {
                topicValues.put(topic, new HashSet<String>());
            }
            topicValues.get(topic).add(value);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException ignored) {
            }
        }
    }
    fieldTopic = getProperty("fieldTopic", null);
    fieldValue = getProperty("fieldValue", null);
    super.init();
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) IOException(java.io.IOException) IOException(java.io.IOException) CSVParser(org.apache.commons.csv.CSVParser) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord)

Example 54 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project logging-log4j2 by apache.

the class CsvLogEventLayout method toSerializable.

@Override
public String toSerializable(final LogEvent event) {
    final StringBuilder buffer = getStringBuilder();
    final CSVFormat format = getFormat();
    try {
        format.print(event.getNanoTime(), buffer, true);
        format.print(event.getTimeMillis(), buffer, false);
        format.print(event.getLevel(), buffer, false);
        format.print(event.getThreadId(), buffer, false);
        format.print(event.getThreadName(), buffer, false);
        format.print(event.getThreadPriority(), buffer, false);
        format.print(event.getMessage().getFormattedMessage(), buffer, false);
        format.print(event.getLoggerFqcn(), buffer, false);
        format.print(event.getLoggerName(), buffer, false);
        format.print(event.getMarker(), buffer, false);
        format.print(event.getThrownProxy(), buffer, false);
        format.print(event.getSource(), buffer, false);
        format.print(event.getContextData(), buffer, false);
        format.print(event.getContextStack(), buffer, false);
        format.println(buffer);
        return buffer.toString();
    } catch (final IOException e) {
        StatusLogger.getLogger().error(event.toString(), e);
        return format.getCommentMarker() + " " + e;
    }
}
Also used : CSVFormat(org.apache.commons.csv.CSVFormat) IOException(java.io.IOException)

Example 55 with CSVFormat

use of org.apache.commons.csv.CSVFormat in project symja_android_library by axkr.

the class ElementPreprocessor method main.

public static void main(String[] args) {
    F.initSymbols();
    FileReader reader = null;
    try {
        EvalEngine engine = EvalEngine.get();
        boolean relaxedSyntax = false;
        String userHome = System.getProperty("user.home");
        String fileName = userHome + "/git/symja_android_library/symja_android_library/tools/src/main/java/org/matheclipse/core/preprocessor/element.csv";
        reader = new FileReader(fileName);
        AST2Expr ast2Expr = new AST2Expr(relaxedSyntax, engine);
        final Parser parser = new Parser(relaxedSyntax, true);
        CSVFormat csvFormat = CSVFormat.RFC4180.withDelimiter('\t');
        Iterable<CSVRecord> records = csvFormat.parse(reader);
        IASTAppendable rowList = F.ListAlloc(130);
        for (CSVRecord record : records) {
            IASTAppendable columnList = F.ListAlloc(record.size());
            for (String str : record) {
                str = str.trim();
                if (str.length() == 0) {
                // columnList.append(F.Null);
                } else if (str.equalsIgnoreCase("Not_applicable")) {
                    columnList.append(F.Missing(F.NotApplicable));
                } else if (str.equalsIgnoreCase("Not_available")) {
                    columnList.append(F.Missing(F.NotAvailable));
                } else if (str.equalsIgnoreCase("Not_known")) {
                    columnList.append(F.Missing(F.Unknown));
                } else {
                    final ASTNode node = parser.parse(str);
                    IExpr temp = ast2Expr.convert(node);
                    if (temp.isList() || temp.isReal()) {
                        columnList.append(temp);
                    } else {
                        if (str.charAt(0) == '\"') {
                            columnList.append(str.substring(1, str.length() - 1));
                        } else {
                            columnList.append(str);
                        }
                    }
                }
            }
            rowList.append(columnList);
        }
        for (int i = 2; i < rowList.size(); i++) {
            IAST columnList = (IAST) rowList.get(i);
            System.out.print(columnList.internalJavaString(JAVA_FORM_PROPERTIES, 1, x -> null));
            System.out.println(", ");
        }
    // return rowList;
    } catch (IOException ioe) {
        System.out.println("Import: file not found!");
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
            }
        }
    }
}
Also used : EvalEngine(org.matheclipse.core.eval.EvalEngine) IASTAppendable(org.matheclipse.core.interfaces.IASTAppendable) F(org.matheclipse.core.expression.F) IAST(org.matheclipse.core.interfaces.IAST) ASTNode(org.matheclipse.parser.client.ast.ASTNode) CSVRecord(org.apache.commons.csv.CSVRecord) IOException(java.io.IOException) Prefix(org.matheclipse.core.interfaces.IExpr.SourceCodeProperties.Prefix) SourceCodeProperties(org.matheclipse.core.interfaces.IExpr.SourceCodeProperties) CSVFormat(org.apache.commons.csv.CSVFormat) IExpr(org.matheclipse.core.interfaces.IExpr) FileReader(java.io.FileReader) AST2Expr(org.matheclipse.core.convert.AST2Expr) Parser(org.matheclipse.parser.client.Parser) IOException(java.io.IOException) AST2Expr(org.matheclipse.core.convert.AST2Expr) Parser(org.matheclipse.parser.client.Parser) IASTAppendable(org.matheclipse.core.interfaces.IASTAppendable) EvalEngine(org.matheclipse.core.eval.EvalEngine) ASTNode(org.matheclipse.parser.client.ast.ASTNode) FileReader(java.io.FileReader) CSVFormat(org.apache.commons.csv.CSVFormat) CSVRecord(org.apache.commons.csv.CSVRecord) IExpr(org.matheclipse.core.interfaces.IExpr) IAST(org.matheclipse.core.interfaces.IAST)

Aggregations

CSVFormat (org.apache.commons.csv.CSVFormat)57 CSVRecord (org.apache.commons.csv.CSVRecord)22 IOException (java.io.IOException)21 CSVParser (org.apache.commons.csv.CSVParser)19 ArrayList (java.util.ArrayList)15 StringReader (java.io.StringReader)13 InputStream (java.io.InputStream)9 InputStreamReader (java.io.InputStreamReader)8 HashMap (java.util.HashMap)8 CSVPrinter (org.apache.commons.csv.CSVPrinter)8 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)8 RecordField (org.apache.nifi.serialization.record.RecordField)8 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)8 Test (org.junit.Test)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)7 Reader (java.io.Reader)7 LinkedHashMap (java.util.LinkedHashMap)7 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)7 MapRecord (org.apache.nifi.serialization.record.MapRecord)7 Record (org.apache.nifi.serialization.record.Record)7