use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.
the class EntityExtractionFilter method readEntityDictionary.
void readEntityDictionary(String filename, String charset, String type) {
String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
CSVFormat format = CSVFormat.RFC4180;
format = format.withHeader();
try {
InputStream in = IOUtils.getInputStream(absoluteFilename);
java.io.Reader reader = new InputStreamReader(in, charset);
CSVParser parser = format.parse(reader);
for (CSVRecord record : parser.getRecords()) {
String entityName = record.get("name");
String entityUrl = record.get("url");
StringTokenizer tokenizer = new StringTokenizer(entityName);
int tokenCount = 0;
StringBuilder tokenPhrase = new StringBuilder();
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (dictionaryMap.size() < tokenCount + 1) {
dictionaryMap.add(new HashMap<String, Entity>());
}
if (tokenCount > 0) {
tokenPhrase.append(" ");
}
tokenPhrase.append(token);
Map<String, Entity> entityMap = dictionaryMap.get(tokenCount);
Entity newEntity = new Entity(entityName, !tokenizer.hasMoreTokens());
newEntity.setUrl(entityUrl);
newEntity.setType(type);
String key = normalize(tokenPhrase.toString());
Entity entity = entityMap.get(key);
if (entity == null) {
entityMap.put(key, newEntity);
} else if (newEntity.getName().length() < entity.getName().length()) {
entityMap.put(key, newEntity);
}
tokenCount++;
}
}
in.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.
the class SynonymNormalizationFilter method init.
@Override
public void init() {
fieldSynonym = getProperty("fieldSynonym", null);
arrayDelimiter = getProperty("arrayDelimiter", ";");
mustExist = getPropertyAsBoolean("mustExist", false);
synonymLookup = new HashMap<String, String>();
mainWordLookup = new HashSet<String>();
InputStream in = null;
try {
String filename = getProperty("filename", null);
String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
in = IOUtils.getInputStream(absoluteFilename);
java.io.Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8.name());
CSVFormat format = CSVFormat.RFC4180.withDelimiter(',');
CSVParser parser = format.parse(reader);
Iterator<CSVRecord> csvIterator = parser.iterator();
while (csvIterator.hasNext()) {
CSVRecord record = csvIterator.next();
try {
String mainWord = record.get(0);
String[] synonyms = record.get(1).split(arrayDelimiter);
if (synonyms != null && synonyms.length > 0) {
for (String synonym : synonyms) {
if (StringUtils.isNotEmpty(synonym)) {
synonymLookup.put(synonym, mainWord);
}
}
}
mainWordLookup.add(mainWord);
} catch (IllegalArgumentException ignored) {
}
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (in != null) {
try {
in.close();
} catch (IOException ignored) {
}
}
}
super.init();
}
use of org.apache.commons.csv.CSVFormat in project solr-cmd-utils by tblsoft.
the class BlacklistTopicFilter method init.
@Override
public void init() {
topicValues = new HashMap<String, HashSet<String>>();
InputStream in = null;
try {
String filename = getProperty("filename", null);
String absoluteFilename = IOUtils.getAbsoluteFile(getBaseDir(), filename);
in = IOUtils.getInputStream(absoluteFilename);
java.io.Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8.name());
CSVFormat format = CSVFormat.RFC4180.withHeader().withDelimiter(',');
CSVParser parser = format.parse(reader);
Iterator<CSVRecord> csvIterator = parser.iterator();
while (csvIterator.hasNext()) {
CSVRecord record = csvIterator.next();
String topic = record.get(0);
String value = record.get(1).toLowerCase();
if (!topicValues.containsKey(topic)) {
topicValues.put(topic, new HashSet<String>());
}
topicValues.get(topic).add(value);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (in != null) {
try {
in.close();
} catch (IOException ignored) {
}
}
}
fieldTopic = getProperty("fieldTopic", null);
fieldValue = getProperty("fieldValue", null);
super.init();
}
use of org.apache.commons.csv.CSVFormat in project logging-log4j2 by apache.
the class CsvLogEventLayout method toSerializable.
@Override
public String toSerializable(final LogEvent event) {
final StringBuilder buffer = getStringBuilder();
final CSVFormat format = getFormat();
try {
format.print(event.getNanoTime(), buffer, true);
format.print(event.getTimeMillis(), buffer, false);
format.print(event.getLevel(), buffer, false);
format.print(event.getThreadId(), buffer, false);
format.print(event.getThreadName(), buffer, false);
format.print(event.getThreadPriority(), buffer, false);
format.print(event.getMessage().getFormattedMessage(), buffer, false);
format.print(event.getLoggerFqcn(), buffer, false);
format.print(event.getLoggerName(), buffer, false);
format.print(event.getMarker(), buffer, false);
format.print(event.getThrownProxy(), buffer, false);
format.print(event.getSource(), buffer, false);
format.print(event.getContextData(), buffer, false);
format.print(event.getContextStack(), buffer, false);
format.println(buffer);
return buffer.toString();
} catch (final IOException e) {
StatusLogger.getLogger().error(event.toString(), e);
return format.getCommentMarker() + " " + e;
}
}
use of org.apache.commons.csv.CSVFormat in project symja_android_library by axkr.
the class ElementPreprocessor method main.
public static void main(String[] args) {
F.initSymbols();
FileReader reader = null;
try {
EvalEngine engine = EvalEngine.get();
boolean relaxedSyntax = false;
String userHome = System.getProperty("user.home");
String fileName = userHome + "/git/symja_android_library/symja_android_library/tools/src/main/java/org/matheclipse/core/preprocessor/element.csv";
reader = new FileReader(fileName);
AST2Expr ast2Expr = new AST2Expr(relaxedSyntax, engine);
final Parser parser = new Parser(relaxedSyntax, true);
CSVFormat csvFormat = CSVFormat.RFC4180.withDelimiter('\t');
Iterable<CSVRecord> records = csvFormat.parse(reader);
IASTAppendable rowList = F.ListAlloc(130);
for (CSVRecord record : records) {
IASTAppendable columnList = F.ListAlloc(record.size());
for (String str : record) {
str = str.trim();
if (str.length() == 0) {
// columnList.append(F.Null);
} else if (str.equalsIgnoreCase("Not_applicable")) {
columnList.append(F.Missing(F.NotApplicable));
} else if (str.equalsIgnoreCase("Not_available")) {
columnList.append(F.Missing(F.NotAvailable));
} else if (str.equalsIgnoreCase("Not_known")) {
columnList.append(F.Missing(F.Unknown));
} else {
final ASTNode node = parser.parse(str);
IExpr temp = ast2Expr.convert(node);
if (temp.isList() || temp.isReal()) {
columnList.append(temp);
} else {
if (str.charAt(0) == '\"') {
columnList.append(str.substring(1, str.length() - 1));
} else {
columnList.append(str);
}
}
}
}
rowList.append(columnList);
}
for (int i = 2; i < rowList.size(); i++) {
IAST columnList = (IAST) rowList.get(i);
System.out.print(columnList.internalJavaString(JAVA_FORM_PROPERTIES, 1, x -> null));
System.out.println(", ");
}
// return rowList;
} catch (IOException ioe) {
System.out.println("Import: file not found!");
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
}
}
}
}
Aggregations