use of com.univocity.parsers.tsv.TsvParserSettings in project Orchid by JavaEden.
the class CSVParser method parse.
@Override
public JSONElement parse(String extension, String input) {
List<String[]> allRows;
if (extension.equalsIgnoreCase("csv")) {
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setLineSeparator("\n");
CsvParser parser = new CsvParser(settings);
allRows = parser.parseAll(org.apache.commons.io.IOUtils.toInputStream(input));
} else {
TsvParserSettings settings = new TsvParserSettings();
settings.getFormat().setLineSeparator("\n");
TsvParser parser = new TsvParser(settings);
allRows = parser.parseAll(org.apache.commons.io.IOUtils.toInputStream(input));
}
JSONArray array = new JSONArray();
String[] cols = allRows.get(0);
for (int i = 1; i < allRows.size(); i++) {
JSONObject object = new JSONObject();
for (int j = 0; j < cols.length; j++) {
object.put(cols[j], allRows.get(i)[j]);
}
array.put(object);
}
JSONObject object = new JSONObject();
object.put("list", array);
return new JSONElement(object);
}
use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.
the class Main method loadStandardPrefixes.
/**
* Loads all standard prefixes.
*/
public static void loadStandardPrefixes() {
TsvParserSettings parserSettings = new TsvParserSettings();
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setSkipEmptyLines(true);
parserSettings.setReadInputOnSeparateThread(true);
ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
@Override
public void rowProcessed(Object[] row, ParsingContext parsingContext) {
if (row.length <= 1) {
logger.warn("Ignoring line without tab while parsing.");
return;
}
if (row.length == 3) {
try {
prefixes.put(row[0].toString(), row[1].toString());
} catch (IllegalArgumentException e) {
logger.error("Prefix or uri for standard prefixes defined multiple times", e);
}
if (row[2].toString().equals("simple")) {
simpleQueryWhitelist.add(row[0].toString());
}
return;
}
logger.warn("Line with row length " + row.length + " found. Is the formatting of standardPrefixes.tsv correct?");
return;
}
};
parserSettings.setProcessor(rowProcessor);
TsvParser parser = new TsvParser(parserSettings);
try {
parser.parse(new InputStreamReader(new FileInputStream("parserSettings/standardPrefixes.tsv")));
} catch (FileNotFoundException e) {
logger.error("Could not open configuration file for standard prefixes.", e);
}
prefixList.addAll(Main.prefixes.entrySet());
Collections.sort(prefixList, new Comparator<Map.Entry<String, String>>() {
@Override
public int compare(Entry<String, String> arg0, Entry<String, String> arg1) {
return Integer.valueOf(arg1.getValue().length()).compareTo(Integer.valueOf(arg0.getValue().length()));
}
});
}
use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.
the class Main method loadPreBuildQueryTypes.
/**
* Loads all pre-build query types.
*/
public static void loadPreBuildQueryTypes() {
try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("preBuildQueryTypeFiles"))) {
for (Path filePath : directoryStream) {
if (Files.isRegularFile(filePath)) {
if (filePath.toString().endsWith(".preBuildQueryType")) {
String queryString = new String(readAllBytes(filePath));
OpenRDFQueryHandler queryHandler = new OpenRDFQueryHandler(QueryHandler.Validity.DEFAULT, -1L, -1, queryString, "preBuildQueryTypes", "", -1);
if (queryHandler.getValidityStatus() != QueryHandler.Validity.VALID) {
logger.info("The Pre-build query " + filePath + " is no valid SPARQL");
continue;
}
ParsedQuery normalizedPreBuildQuery = queryHandler.getNormalizedQuery();
String queryTypeName = filePath.toString().substring(filePath.toString().lastIndexOf("/") + 1, filePath.toString().lastIndexOf("."));
if (normalizedPreBuildQuery != null) {
String queryDump = normalizedPreBuildQuery.getTupleExpr().toString();
byte[] md5 = DigestUtils.md5(queryDump);
int index = Math.floorMod(queryDump.hashCode(), numberOfQueryTypeDiskMaps);
if (queryTypes[index].containsKey(md5)) {
String existingName = queryTypes[index].get(md5);
if (!existingName.equals(queryTypeName)) {
logger.info(queryTypes[index].get(md5) + " is duplicate of " + queryTypeName);
}
}
queryTypes[index].put(md5, queryTypeName);
} else {
logger.info("Pre-build query " + queryTypeName + " could not be parsed.");
}
}
if (filePath.toString().endsWith(".tsv")) {
TsvParserSettings parserSettings = new TsvParserSettings();
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setSkipEmptyLines(true);
parserSettings.setReadInputOnSeparateThread(true);
ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
@Override
public void rowProcessed(Object[] row, ParsingContext parsingContext) {
if (row.length <= 1) {
logger.warn("Ignoring line without tab while parsing.");
return;
}
if (row.length == 5) {
queryTypeToToolMapping.put(new Tuple2<>(row[0].toString(), row[1].toString()), new Tuple2<>(row[2].toString(), row[3].toString()));
return;
}
logger.warn("Line with row length " + row.length + " found. Is the formatting of toolMapping.tsv correct?");
return;
}
};
parserSettings.setProcessor(rowProcessor);
TsvParser parser = new TsvParser(parserSettings);
parser.parse(filePath.toFile());
}
}
}
} catch (IOException e) {
logger.error("Could not read from directory inputData/queryType/premadeQueryTypeFiles", e);
}
}
use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.
the class Main method loadToolNamesForUserCategory.
/**
* Loads all user agents that should be in the user source category.
*/
private static void loadToolNamesForUserCategory() {
TsvParserSettings parserSettings = new TsvParserSettings();
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setSkipEmptyLines(true);
parserSettings.setReadInputOnSeparateThread(true);
ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
@Override
public void rowProcessed(Object[] row, ParsingContext parsingContext) {
if (row.length < 1) {
logger.warn("Ignoring line without tab while parsing.");
return;
}
if (row.length == 1) {
sourceCategoryUserToolName.add(row[0].toString());
return;
}
logger.warn("Line with row length " + row.length + " found. Is the formatting of toolNameForUserCategory.tsv correct?");
return;
}
};
parserSettings.setProcessor(rowProcessor);
TsvParser parser = new TsvParser(parserSettings);
try {
parser.parse(new InputStreamReader(new FileInputStream("userAgentClassification/toolNameForUserCategory.tsv")));
} catch (FileNotFoundException e) {
logger.error("Could not open configuration file for standard prefixes.", e);
}
}
use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.
the class InputHandlerTSV method parseTo.
/**
* Read the file given by reader and hands the data to the outputHandler.
*
* @param outputHandler Handles the data that should be written.
*/
public final void parseTo(final OutputHandler outputHandler, int day) {
// read in queries from .tsv
TsvParserSettings parserSettings = new TsvParserSettings();
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setSkipEmptyLines(true);
parserSettings.setReadInputOnSeparateThread(true);
ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
@Override
public void rowProcessed(Object[] row, ParsingContext parsingContext) {
if (row.length <= 1) {
logger.warn("Ignoring line without tab while parsing.");
return;
}
Tuple2<String, QueryHandler.Validity> queryTuple = decode(row[0].toString(), inputFile, parsingContext.currentLine());
String queryString = queryTuple._1;
QueryHandler.Validity validity = queryTuple._2;
String userAgent = "null";
if (row[2] != null) {
userAgent = row[2].toString();
}
String timeStamp = "null";
if (row[3] != null) {
timeStamp = row[3].toString();
}
Long line = parsingContext.currentLine();
try {
outputHandler.writeLine(queryString, validity, userAgent, timeStamp, line, day, inputFile);
} catch (NullPointerException e) {
outputHandler.writeLine("", QueryHandler.Validity.INTERNAL_ERROR, userAgent, timeStamp, line, day, inputFile);
logger.error("Unexpected Null Pointer Exception in writeLine.", e);
}
}
};
parserSettings.setProcessor(rowProcessor);
TsvParser parser = new TsvParser(parserSettings);
parser.parse(reader);
outputHandler.closeFiles();
if (preprocessedWriter != null) {
preprocessedWriter.close();
}
}
Aggregations