Search in sources :

Example 6 with TsvParserSettings

use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.

the class Main method loadPreBuildQueryTypes.

/**
 * Loads all pre-build query types.
 */
public static void loadPreBuildQueryTypes() {
    try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(Paths.get("preBuildQueryTypeFiles"))) {
        for (Path filePath : directoryStream) {
            if (Files.isRegularFile(filePath)) {
                if (filePath.toString().endsWith(".preBuildQueryType")) {
                    String queryString = new String(readAllBytes(filePath));
                    OpenRDFQueryHandler queryHandler = new OpenRDFQueryHandler(QueryHandler.Validity.DEFAULT, -1L, -1, queryString, "preBuildQueryTypes", "", -1);
                    if (queryHandler.getValidityStatus() != QueryHandler.Validity.VALID) {
                        logger.info("The Pre-build query " + filePath + " is no valid SPARQL");
                        continue;
                    }
                    ParsedQuery normalizedPreBuildQuery = queryHandler.getNormalizedQuery();
                    String queryTypeName = filePath.toString().substring(filePath.toString().lastIndexOf("/") + 1, filePath.toString().lastIndexOf("."));
                    if (normalizedPreBuildQuery != null) {
                        String queryDump = normalizedPreBuildQuery.getTupleExpr().toString();
                        byte[] md5 = DigestUtils.md5(queryDump);
                        int index = Math.floorMod(queryDump.hashCode(), numberOfQueryTypeDiskMaps);
                        if (queryTypes[index].containsKey(md5)) {
                            String existingName = queryTypes[index].get(md5);
                            if (!existingName.equals(queryTypeName)) {
                                logger.info(queryTypes[index].get(md5) + " is duplicate of " + queryTypeName);
                            }
                        }
                        queryTypes[index].put(md5, queryTypeName);
                    } else {
                        logger.info("Pre-build query " + queryTypeName + " could not be parsed.");
                    }
                }
                if (filePath.toString().endsWith(".tsv")) {
                    TsvParserSettings parserSettings = new TsvParserSettings();
                    parserSettings.setLineSeparatorDetectionEnabled(true);
                    parserSettings.setHeaderExtractionEnabled(true);
                    parserSettings.setSkipEmptyLines(true);
                    parserSettings.setReadInputOnSeparateThread(true);
                    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

                        @Override
                        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
                            if (row.length <= 1) {
                                logger.warn("Ignoring line without tab while parsing.");
                                return;
                            }
                            if (row.length == 5) {
                                queryTypeToToolMapping.put(new Tuple2<>(row[0].toString(), row[1].toString()), new Tuple2<>(row[2].toString(), row[3].toString()));
                                return;
                            }
                            logger.warn("Line with row length " + row.length + " found. Is the formatting of toolMapping.tsv correct?");
                            return;
                        }
                    };
                    parserSettings.setProcessor(rowProcessor);
                    TsvParser parser = new TsvParser(parserSettings);
                    parser.parse(filePath.toFile());
                }
            }
        }
    } catch (IOException e) {
        logger.error("Could not read from directory inputData/queryType/premadeQueryTypeFiles", e);
    }
}
Also used : Path(java.nio.file.Path) ParsingContext(com.univocity.parsers.common.ParsingContext) ParsedQuery(org.openrdf.query.parser.ParsedQuery) OpenRDFQueryHandler(query.OpenRDFQueryHandler) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 7 with TsvParserSettings

use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.

the class Anonymizer method loadAllowedUserAgents.

public static void loadAllowedUserAgents() {
    try {
        InputStreamReader reader = new InputStreamReader(new FileInputStream("anonymization/allowedUserAgents"));
        TsvParserSettings parserSettings = new TsvParserSettings();
        parserSettings.setLineSeparatorDetectionEnabled(true);
        parserSettings.setHeaderExtractionEnabled(true);
        parserSettings.setSkipEmptyLines(true);
        parserSettings.setReadInputOnSeparateThread(true);
        ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

            @Override
            public void rowProcessed(Object[] row, ParsingContext parsingContext) {
                if (row.length <= 1) {
                    logger.warn("Ignoring line without tab while parsing.");
                    return;
                }
                allowedUserAgents.put(row[1].toString(), row[0].toString());
            }
        };
        parserSettings.setProcessor(rowProcessor);
        TsvParser parser = new TsvParser(parserSettings);
        parser.parse(reader);
    } catch (FileNotFoundException e) {
        logger.error("Could not read the allowed user agent string mapping.", e);
    }
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 8 with TsvParserSettings

use of com.univocity.parsers.tsv.TsvParserSettings in project eol-globi-data by jhpoelen.

the class DatasetImporterForHurlbert method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    ;
    try (InputStream resource = getDataset().retrieve(RESOURCE)) {
        setCurrentResource(RESOURCE);
        Set<String> regions = new HashSet<String>();
        Set<String> locales = new HashSet<String>();
        Set<String> habitats = new HashSet<String>();
        TsvParserSettings settings = new TsvParserSettings();
        settings.getFormat().setLineSeparator("\n");
        settings.setHeaderExtractionEnabled(true);
        TsvParser parser = new TsvParser(settings);
        parser.beginParsing(resource, CharsetConstant.UTF8);
        Record record;
        while ((record = parser.parseNextRecord()) != null) {
            setCurrentLine(parser.getContext().currentLine());
            String columnNameSource = "Source";
            String sourceCitation = columnValueOrNull(record, columnNameSource);
            if (StringUtils.isBlank(sourceCitation)) {
                LOG.warn(createMsg("failed to extract source from column [" + columnNameSource + "] in [" + RESOURCE + "]"));
            } else {
                importRecords(regions, locales, habitats, record, sourceCitation);
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to access [" + RESOURCE + "]", e);
    }
}
Also used : InputStream(java.io.InputStream) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) Record(com.univocity.parsers.common.record.Record) IOException(java.io.IOException) TsvParser(com.univocity.parsers.tsv.TsvParser) HashSet(java.util.HashSet)

Example 9 with TsvParserSettings

use of com.univocity.parsers.tsv.TsvParserSettings in project eol-globi-data by jhpoelen.

the class DatasetImporterForFishbase3 method handleTsvInputStream.

public static void handleTsvInputStream(RecordListener listener, InputStream is) throws StudyImporterException {
    try (InputStream inputStream = is) {
        TsvParserSettings settings = new TsvParserSettings();
        settings.getFormat().setLineSeparator("\n");
        settings.setMaxCharsPerColumn(4096 * 8);
        settings.setHeaderExtractionEnabled(true);
        TsvParser parser = new TsvParser(settings);
        parser.beginParsing(inputStream, CharsetConstant.UTF8);
        Record record;
        while ((record = parser.parseNextRecord()) != null) {
            listener.onRecord(record);
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to import tsv stream", e);
    }
}
Also used : InputStream(java.io.InputStream) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) Record(com.univocity.parsers.common.record.Record) IOException(java.io.IOException) TsvParser(com.univocity.parsers.tsv.TsvParser)

Example 10 with TsvParserSettings

use of com.univocity.parsers.tsv.TsvParserSettings in project QueryAnalysis by Wikidata.

the class Main method loadPropertyGroupMapping.

/**
 * Loads the mapping of property to groups.
 */
private static void loadPropertyGroupMapping() {
    TsvParserSettings parserSettings = new TsvParserSettings();
    parserSettings.setLineSeparatorDetectionEnabled(true);
    parserSettings.setHeaderExtractionEnabled(true);
    parserSettings.setSkipEmptyLines(true);
    parserSettings.setReadInputOnSeparateThread(true);
    ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {

        @Override
        public void rowProcessed(Object[] row, ParsingContext parsingContext) {
            if (row.length <= 1) {
                logger.warn("Ignoring line without tab while parsing.");
                return;
            }
            if (row.length == 2) {
                if (row[1] == null) {
                    return;
                }
                propertyGroupMapping.put(row[0].toString(), new HashSet<String>(Arrays.asList(row[1].toString().split(","))));
                return;
            }
            logger.warn("Line with row length " + row.length + " found. Is the formatting of propertyGroupMapping.tsv correct?");
            return;
        }
    };
    parserSettings.setProcessor(rowProcessor);
    TsvParser parser = new TsvParser(parserSettings);
    File file = new File("propertyClassification/propertyGroupMapping.tsv");
    parser.parse(file);
}
Also used : ParsingContext(com.univocity.parsers.common.ParsingContext) TsvParserSettings(com.univocity.parsers.tsv.TsvParserSettings) ObjectRowProcessor(com.univocity.parsers.common.processor.ObjectRowProcessor) TsvParser(com.univocity.parsers.tsv.TsvParser)

Aggregations

TsvParser (com.univocity.parsers.tsv.TsvParser)10 TsvParserSettings (com.univocity.parsers.tsv.TsvParserSettings)10 ParsingContext (com.univocity.parsers.common.ParsingContext)5 ObjectRowProcessor (com.univocity.parsers.common.processor.ObjectRowProcessor)5 Record (com.univocity.parsers.common.record.Record)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 HashSet (java.util.HashSet)2 JSONElement (com.eden.common.json.JSONElement)1 CsvParser (com.univocity.parsers.csv.CsvParser)1 CsvParserSettings (com.univocity.parsers.csv.CsvParserSettings)1 Path (java.nio.file.Path)1 Entry (java.util.Map.Entry)1 JSONArray (org.json.JSONArray)1 JSONObject (org.json.JSONObject)1 ParsedQuery (org.openrdf.query.parser.ParsedQuery)1 OpenRDFQueryHandler (query.OpenRDFQueryHandler)1