Search in sources :

Example 1 with TsvParser

use of com.univocity.parsers.tsv.TsvParser in project Orchid by JavaEden.

the class CSVParser method parse.

@Override
public JSONElement parse(String extension, String input) {
    List<String[]> allRows;
    if (extension.equalsIgnoreCase("csv")) {
        CsvParserSettings settings = new CsvParserSettings();
        settings.getFormat().setLineSeparator("\n");
        CsvParser parser = new CsvParser(settings);
        allRows = parser.parseAll(org.apache.commons.io.IOUtils.toInputStream(input));
    } else {
        TsvParserSettings settings = new TsvParserSettings();
        settings.getFormat().setLineSeparator("\n");
        TsvParser parser = new TsvParser(settings);
        allRows = parser.parseAll(org.apache.commons.io.IOUtils.toInputStream(input));
    }
    JSONArray array = new JSONArray();
    String[] cols = allRows.get(0);
    for (int i = 1; i < allRows.size(); i++) {
        JSONObject object = new JSONObject();
        for (int j = 0; j < cols.length; j++) {
            object.put(cols[j], allRows.get(i)[j]);
        }
        array.put(object);
    }
    JSONObject object = new JSONObject();
    object.put("list", array);
    return new JSONElement(object);
}
Also used : CsvParserSettings(com.univocity.parsers.csv.CsvParserSettings) JSONObject(org.json.JSONObject) JSONElement(com.eden.common.json.JSONElement) JSONArray(org.json.JSONArray) CsvParser(com.univocity.parsers.csv.CsvParser) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 2 with TsvParser

use of com.univocity.parsers.tsv.TsvParser in project QueryAnalysis by Wikidata.

the class Main method loadStandardPrefixes.

/**
 * Loads all standard prefixes.
 */
public static void loadStandardPrefixes() {
    TsvParserSettings parserSettings = new TsvParserSettings();
    parserSettings.setLineSeparatorDetectionEnabled(true);
    parserSettings.setHeaderExtractionEnabled(true);
    parserSettings.setSkipEmptyLines(true);
    parserSettings.setReadInputOnSeparateThread(true);
    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

        @Override
        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
            if (row.length <= 1) {
                logger.warn("Ignoring line without tab while parsing.");
                return;
            }
            if (row.length == 3) {
                try {
                    prefixes.put(row[0].toString(), row[1].toString());
                } catch (IllegalArgumentException e) {
                    logger.error("Prefix or uri for standard prefixes defined multiple times", e);
                }
                if (row[2].toString().equals("simple")) {
                    simpleQueryWhitelist.add(row[0].toString());
                }
                return;
            }
            logger.warn("Line with row length " + row.length + " found. Is the formatting of standardPrefixes.tsv correct?");
            return;
        }
    };
    parserSettings.setProcessor(rowProcessor);
    TsvParser parser = new TsvParser(parserSettings);
    try {
        parser.parse(new InputStreamReader(new FileInputStream("parserSettings/standardPrefixes.tsv")));
    } catch (FileNotFoundException e) {
        logger.error("Could not open configuration file for standard prefixes.", e);
    }
    prefixList.addAll(Main.prefixes.entrySet());
    Collections.sort(prefixList, new Comparator<Map.Entry<String, String>>() {

        @Override
        public int compare(Entry<String, String> arg0, Entry<String, String> arg1) {
            return Integer.valueOf(arg1.getValue().length()).compareTo(Integer.valueOf(arg0.getValue().length()));
        }
    });
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) Entry(java.util.Map.Entry) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 3 with TsvParser

use of com.univocity.parsers.tsv.TsvParser in project QueryAnalysis by Wikidata.

the class Main method loadPreBuildQueryTypes.

/**
 * Loads all pre-build query types.
 */
public static void loadPreBuildQueryTypes() {
    try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("preBuildQueryTypeFiles"))) {
        for (Path filePath : directoryStream) {
            if (Files.isRegularFile(filePath)) {
                if (filePath.toString().endsWith(".preBuildQueryType")) {
                    String queryString = new String(readAllBytes(filePath));
                    OpenRDFQueryHandler queryHandler = new OpenRDFQueryHandler(QueryHandler.Validity.DEFAULT, -1L, -1, queryString, "preBuildQueryTypes", "", -1);
                    if (queryHandler.getValidityStatus() != QueryHandler.Validity.VALID) {
                        logger.info("The Pre-build query " + filePath + " is no valid SPARQL");
                        continue;
                    }
                    ParsedQuery normalizedPreBuildQuery = queryHandler.getNormalizedQuery();
                    String queryTypeName = filePath.toString().substring(filePath.toString().lastIndexOf("/") + 1, filePath.toString().lastIndexOf("."));
                    if (normalizedPreBuildQuery != null) {
                        String queryDump = normalizedPreBuildQuery.getTupleExpr().toString();
                        byte[] md5 = DigestUtils.md5(queryDump);
                        int index = Math.floorMod(queryDump.hashCode(), numberOfQueryTypeDiskMaps);
                        if (queryTypes[index].containsKey(md5)) {
                            String existingName = queryTypes[index].get(md5);
                            if (!existingName.equals(queryTypeName)) {
                                logger.info(queryTypes[index].get(md5) + " is duplicate of " + queryTypeName);
                            }
                        }
                        queryTypes[index].put(md5, queryTypeName);
                    } else {
                        logger.info("Pre-build query " + queryTypeName + " could not be parsed.");
                    }
                }
                if (filePath.toString().endsWith(".tsv")) {
                    TsvParserSettings parserSettings = new TsvParserSettings();
                    parserSettings.setLineSeparatorDetectionEnabled(true);
                    parserSettings.setHeaderExtractionEnabled(true);
                    parserSettings.setSkipEmptyLines(true);
                    parserSettings.setReadInputOnSeparateThread(true);
                    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

                        @Override
                        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
                            if (row.length <= 1) {
                                logger.warn("Ignoring line without tab while parsing.");
                                return;
                            }
                            if (row.length == 5) {
                                queryTypeToToolMapping.put(new Tuple2<>(row[0].toString(), row[1].toString()), new Tuple2<>(row[2].toString(), row[3].toString()));
                                return;
                            }
                            logger.warn("Line with row length " + row.length + " found. Is the formatting of toolMapping.tsv correct?");
                            return;
                        }
                    };
                    parserSettings.setProcessor(rowProcessor);
                    TsvParser parser = new TsvParser(parserSettings);
                    parser.parse(filePath.toFile());
                }
            }
        }
    } catch (IOException e) {
        logger.error("Could not read from directory inputData/queryType/premadeQueryTypeFiles", e);
    }
}
Also used : Path(java.nio.file.Path) ParsingContext(com.univocity.parsers.common.ParsingContext) ParsedQuery(org.openrdf.query.parser.ParsedQuery) OpenRDFQueryHandler(query.OpenRDFQueryHandler) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 4 with TsvParser

use of com.univocity.parsers.tsv.TsvParser in project QueryAnalysis by Wikidata.

the class Main method loadToolNamesForUserCategory.

/**
 * Loads all user agents that should be in the user source category.
 */
private static void loadToolNamesForUserCategory() {
    TsvParserSettings parserSettings = new TsvParserSettings();
    parserSettings.setLineSeparatorDetectionEnabled(true);
    parserSettings.setHeaderExtractionEnabled(true);
    parserSettings.setSkipEmptyLines(true);
    parserSettings.setReadInputOnSeparateThread(true);
    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

        @Override
        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
            if (row.length < 1) {
                logger.warn("Ignoring line without tab while parsing.");
                return;
            }
            if (row.length == 1) {
                sourceCategoryUserToolName.add(row[0].toString());
                return;
            }
            logger.warn("Line with row length " + row.length + " found. Is the formatting of toolNameForUserCategory.tsv correct?");
            return;
        }
    };
    parserSettings.setProcessor(rowProcessor);
    TsvParser parser = new TsvParser(parserSettings);
    try {
        parser.parse(new InputStreamReader(new FileInputStream("userAgentClassification/toolNameForUserCategory.tsv")));
    } catch (FileNotFoundException e) {
        logger.error("Could not open configuration file for standard prefixes.", e);
    }
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 5 with TsvParser

use of com.univocity.parsers.tsv.TsvParser in project QueryAnalysis by Wikidata.

the class InputHandlerTSV method parseTo.

/**
 * Read the file given by reader and hands the data to the outputHandler.
 *
 * @param outputHandler Handles the data that should be written.
 */
public final void parseTo(final OutputHandler outputHandler, int day) {
    // read in queries from .tsv
    TsvParserSettings parserSettings = new TsvParserSettings();
    parserSettings.setLineSeparatorDetectionEnabled(true);
    parserSettings.setHeaderExtractionEnabled(true);
    parserSettings.setSkipEmptyLines(true);
    parserSettings.setReadInputOnSeparateThread(true);
    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

        @Override
        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
            if (row.length <= 1) {
                logger.warn("Ignoring line without tab while parsing.");
                return;
            }
            Tuple2<String, QueryHandler.Validity> queryTuple = decode(row[0].toString(), inputFile, parsingContext.currentLine());
            String queryString = queryTuple._1;
            QueryHandler.Validity validity = queryTuple._2;
            String userAgent = "null";
            if (row[2] != null) {
                userAgent = row[2].toString();
            }
            String timeStamp = "null";
            if (row[3] != null) {
                timeStamp = row[3].toString();
            }
            Long line = parsingContext.currentLine();
            try {
                outputHandler.writeLine(queryString, validity, userAgent, timeStamp, line, day, inputFile);
            } catch (NullPointerException e) {
                outputHandler.writeLine("", QueryHandler.Validity.INTERNAL_ERROR, userAgent, timeStamp, line, day, inputFile);
                logger.error("Unexpected Null Pointer Exception in writeLine.", e);
            }
        }
    };
    parserSettings.setProcessor(rowProcessor);
    TsvParser parser = new TsvParser(parserSettings);
    parser.parse(reader);
    outputHandler.closeFiles();
    if (preprocessedWriter != null) {
        preprocessedWriter.close();
    }
}
Also used : QueryHandler(query.QueryHandler) ParsingContext(com.univocity.parsers.common.ParsingContext) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Aggregations

TsvParser (com.univocity.parsers.tsv.TsvParser)8 TsvParserSettings (com.univocity.parsers.tsv.TsvParserSettings)8 ParsingContext (com.univocity.parsers.common.ParsingContext)5 ObjectRowProcessor (com.univocity.parsers.common.processor.ObjectRowProcessor)5 Record (com.univocity.parsers.common.record.Record)2 JSONElement (com.eden.common.json.JSONElement)1 CsvParser (com.univocity.parsers.csv.CsvParser)1 CsvParserSettings (com.univocity.parsers.csv.CsvParserSettings)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Path (java.nio.file.Path)1 HashSet (java.util.HashSet)1 Entry (java.util.Map.Entry)1 JSONArray (org.json.JSONArray)1 JSONObject (org.json.JSONObject)1 ParsedQuery (org.openrdf.query.parser.ParsedQuery)1 OpenRDFQueryHandler (query.OpenRDFQueryHandler)1 QueryHandler (query.QueryHandler)1