Search in sources :

Example 56 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class TaxonCacheService method initTaxonCache.

private void initTaxonCache() throws PropertyEnricherException {
    DB db = initDb("taxonCache");
    String taxonCacheName = "taxonCacheById";
    if (db.exists(taxonCacheName)) {
        LOG.info("re-using pre-existing cache");
        resolvedIdToTaxonMap = db.getTreeMap(taxonCacheName);
    } else {
        LOG.info("no pre-existing cache found, rebuilding...");
        LOG.info("taxon cache loading [" + taxonCacheResource + "]...");
        StopWatch watch = new StopWatch();
        watch.start();
        try {
            resolvedIdToTaxonMap = db.createTreeMap(taxonCacheName).pumpPresort(100000).pumpIgnoreDuplicates().pumpSource(taxonCacheIterator(taxonCacheResource, new LineSkipper() {

                @Override
                public boolean shouldSkipLine(LabeledCSVParser parser) {
                    final Taxon taxon = TaxonCacheParser.parseLine(parser);
                    return StringUtils.isBlank(taxon.getPath());
                }
            })).keySerializer(BTreeKeySerializer.STRING).make();
        } catch (IOException e) {
            throw new PropertyEnricherException("failed to instantiate taxonCache: [" + e.getMessage() + "]", e);
        }
        watch.stop();
        LOG.info("taxon cache loading [" + taxonCacheResource + "] done.");
        logCacheLoadStats(watch.getTime(), resolvedIdToTaxonMap.size());
        watch.reset();
    }
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) Taxon(org.eol.globi.domain.Taxon) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) DB(org.mapdb.DB) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 57 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForCruaud method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser dataParser;
    try {
        dataParser = parserFactory.createParser(RESOURCE_PATH, CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource [" + RESOURCE_PATH + "]", e);
    }
    try {
        Study study = nodeFactory.getOrCreateStudy(new StudyImpl("cruaud", SOURCE, "http://dx.doi.org/10.1093/sysbio/sys068", null));
        while (dataParser.getLine() != null) {
            if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
                try {
                    String parasiteName = StringUtils.trim(dataParser.getValueByLabel("Family and Species"));
                    String hostName = StringUtils.trim(dataParser.getValueByLabel("Natural host Ficus species"));
                    hostName = StringUtils.replace(hostName, "F.", "Ficus");
                    if (areNamesAvailable(parasiteName, hostName)) {
                        Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
                        Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
                        parasite.interactsWith(host, InteractType.PARASITE_OF);
                        String samplingLocation = StringUtils.trim(dataParser.getValueByLabel("Sampling location"));
                        if (getGeoNamesService().hasTermForLocale(samplingLocation)) {
                            LatLng pointForLocality = getGeoNamesService().findLatLng(samplingLocation);
                            if (pointForLocality == null) {
                                LOG.warn("no location associated with locality [" + samplingLocation + "]");
                            } else {
                                Location location = nodeFactory.getOrCreateLocation(new LocationImpl(pointForLocality.getLat(), pointForLocality.getLng(), null, null));
                                parasite.caughtIn(location);
                                host.caughtIn(location);
                            }
                        } else {
                            LOG.warn("no location associated with locality [" + samplingLocation + "]");
                        }
                    }
                } catch (NodeFactoryException | NumberFormatException e) {
                    throw new StudyImporterException("failed to import line [" + (dataParser.lastLineNumber() + 1) + "]", e);
                }
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("problem importing [" + RESOURCE_PATH + "]", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) Specimen(org.eol.globi.domain.Specimen) LocationImpl(org.eol.globi.domain.LocationImpl) LatLng(org.eol.globi.geo.LatLng) Location(org.eol.globi.domain.Location)

Example 58 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForDunne method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    Study study = createStudy();
    try {
        LabeledCSVParser nodes = parserFactory.createParser(getNodesResourceName(), CharsetConstant.UTF8);
        nodes.changeDelimiter(getDelimiter());
        Map<Integer, Taxon> taxonForNode = new HashMap<Integer, Taxon>();
        while (nodes.getLine() != null) {
            Integer nodeId = getNodeId(nodes);
            if (nodeId != null) {
                final String tsn = nodes.getValueByLabel("TSN");
                taxonForNode.put(nodeId, new TaxonImpl(nodes.getValueByLabel("Name"), TaxonomyProvider.ID_PREFIX_ITIS + tsn));
            }
        }
        LabeledCSVParser links = parserFactory.createParser(getLinksResourceName(), CharsetConstant.UTF8);
        links.changeDelimiter(getDelimiter());
        while (links.getLine() != null) {
            List<Location> locations = new ArrayList<>();
            if (getLocation() != null) {
                Location loc = nodeFactory.getOrCreateLocation(new LocationImpl(getLocation().getLat(), getLocation().getLng(), null, null));
                if (loc != null) {
                    locations.add(loc);
                }
            }
            for (Location location : locations) {
                addLink(study, taxonForNode, links, location);
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to find data file(s)", e);
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to create nodes", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) Taxon(org.eol.globi.domain.Taxon) TaxonImpl(org.eol.globi.domain.TaxonImpl) ArrayList(java.util.ArrayList) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location)

Example 59 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForGlobalWebDb method parseDietMatrix.

static void parseDietMatrix(InteractionListener listener, String dietMatrixWithCitation, String sourceCitation) throws IOException, StudyImporterException {
    String[] rows = dietMatrixWithCitation.split("\r\n");
    if (rows.length > 0) {
        String citation = rows[0].replaceAll("^\"", "").replaceAll("\",*$", "");
        List<String> matrixRows = Arrays.asList(rows).subList(1, rows.length);
        String matrix = org.apache.commons.lang.StringUtils.join(matrixRows, "\n");
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(IOUtils.toInputStream(matrix));
        String[] headerColumns = parser.getLabels();
        if (headerColumns.length > 1) {
            String[] split1 = headerColumns[0].split("-");
            String habitat = split1[0];
            List<String> localityList = Arrays.asList(split1).subList(1, split1.length);
            String locality = localityList.stream().map(String::trim).collect(Collectors.joining(", "));
            Map<String, String> props = new TreeMap<String, String>() {

                {
                    put(StudyImporterForTSV.HABITAT_NAME, org.apache.commons.lang.StringUtils.trim(habitat));
                    put(StudyImporterForTSV.LOCALITY_NAME, org.apache.commons.lang.StringUtils.trim(locality));
                    put(StudyImporterForTSV.INTERACTION_TYPE_NAME, InteractType.ATE.getLabel());
                    put(StudyImporterForTSV.INTERACTION_TYPE_ID, InteractType.ATE.getIRI());
                    put(StudyImporterForTSV.REFERENCE_ID, MD5.getHashString(citation));
                    put(StudyImporterForTSV.REFERENCE_CITATION, citation);
                    put(StudyImporterForTSV.STUDY_SOURCE_CITATION, sourceCitation);
                }
            };
            List<String> sourceTaxa = Arrays.asList(headerColumns).subList(1, headerColumns.length);
            while (parser.getLine() != null) {
                for (String sourceTaxon : sourceTaxa) {
                    String value = parser.getValueByLabel(sourceTaxon);
                    String targetTaxon = parser.getValueByLabel(headerColumns[0]);
                    if (NumberUtils.isDigits(value) && Integer.parseInt(value) > 0) {
                        listener.newLink(new TreeMap<String, String>(props) {

                            {
                                put(StudyImporterForTSV.SOURCE_TAXON_NAME, org.apache.commons.lang.StringUtils.trim(sourceTaxon));
                                put(StudyImporterForTSV.TARGET_TAXON_NAME, org.apache.commons.lang.StringUtils.trim(targetTaxon));
                            }
                        });
                    }
                }
            }
        }
    }
}
Also used : LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) TreeMap(java.util.TreeMap)

Example 60 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForGoMexSI2 method addReferences.

protected void addReferences(Map<String, Study> referenceIdToStudy) throws StudyImporterException {
    String referenceResource = getReferencesResourcePath();
    try {
        LabeledCSVParser parser = parserFactory.createParser(referenceResource, CharsetConstant.UTF8);
        Map<String, String> studyContributorMap = collectContributors(referenceResource, parser);
        parser = parserFactory.createParser(referenceResource, CharsetConstant.UTF8);
        while (parser.getLine() != null) {
            String refId = getMandatoryValue(referenceResource, parser, "DATA_ID");
            Study study = referenceIdToStudy.get(refId);
            if (study == null) {
                addNewStudy(referenceIdToStudy, referenceResource, parser, refId, studyContributorMap.get(refId));
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to open resource [" + referenceResource + "]", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6