Search in sources :

Example 46 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForBioInfo method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    Map<String, String> refMap;
    LabeledCSVParser relationsParser;
    try {
        refMap = buildRefMap(parserFactory.createParser(REFERENCE_DATA_FILE, CharsetConstant.UTF8));
        Map<String, Taxon> taxonMap = buildTaxonMap(parserFactory.createParser(TAXON_DATA_FILE, CharsetConstant.UTF8));
        relationsParser = parserFactory.createParser(RELATIONS_DATA_FILE, CharsetConstant.UTF8);
        createRelations(relationsParser, refMap, taxonMap);
    } catch (IOException e1) {
        throw new StudyImporterException("problem reading trophic relations file [" + RELATIONS_DATA_FILE + "]", e1);
    }
}
Also used : Taxon(org.eol.globi.domain.Taxon) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Example 47 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class ReferenceUtil method buildRefMap.

protected static Map<String, String> buildRefMap(ParserFactory parserFactory, String referencePath, String keyColumnName, String valueColumnName, char delimiter) throws StudyImporterException {
    Map<String, String> refMap = new TreeMap<String, String>();
    try {
        LabeledCSVParser referenceParser = parserFactory.createParser(referencePath, CharsetConstant.UTF8);
        referenceParser.changeDelimiter(delimiter);
        while (referenceParser.getLine() != null) {
            String shortReference = referenceParser.getValueByLabel(keyColumnName);
            if (StringUtils.isBlank(shortReference)) {
                LOG.warn("missing short reference on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]");
            } else {
                String fullReference = referenceParser.getValueByLabel(valueColumnName);
                if (StringUtils.isBlank(fullReference)) {
                    LOG.warn("missing full reference for [" + shortReference + "] on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]");
                    fullReference = shortReference;
                }
                if (StringUtils.isBlank(refMap.get(StringUtils.trim(shortReference)))) {
                    refMap.put(StringUtils.trim(shortReference), StringUtils.trim(fullReference));
                } else {
                    LOG.warn("skipping [" + shortReference + "] on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]: key already defined.");
                }
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource [" + referencePath + "]", e);
    }
    return refMap;
}
Also used : LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) TreeMap(java.util.TreeMap)

Example 48 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForSeltmann method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    final String archiveURL = DatasetUtil.getNamedResourceURI(getDataset(), "archive");
    if (org.apache.commons.lang.StringUtils.isBlank(archiveURL)) {
        throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
    }
    DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
    final HTreeMap<String, Map<String, String>> assocMap = db.createHashMap("assocMap").make();
    try {
        InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File assocTempFile = null;
        File occTempFile = null;
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))associatedTaxa.tsv$")) {
                assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))occurrences.tsv$")) {
                occTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (assocTempFile == null) {
            throw new StudyImporterException("failed to find expected [associatedTaxa.tsv] resource");
        }
        if (occTempFile == null) {
            throw new StudyImporterException("failed to find expected [occurrences.tsv] resource");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(assocTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        parser.changeDelimiter('\t');
        while (parser.getLine() != null) {
            Map<String, String> prop = new HashMap<String, String>();
            addKeyValue(parser, prop, "dwc:coreid");
            addKeyValue(parser, prop, "dwc:basisOfRecord");
            addKeyValue(parser, prop, FIELD_IDIGBIO_RECORD_ID);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_GENUS);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_SPECIFIC_EPITHET);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_SCIENTIFIC_NAME);
            addKeyValue(parser, prop, "dwc:basisOfRecord");
            addKeyValue(parser, prop, "aec:associatedRelationshipTerm");
            addKeyValue(parser, prop, "aec:associatedRelationshipURI");
            addKeyValue(parser, prop, "aec:associatedLocationOnHost");
            addKeyValue(parser, prop, "aec:associatedEmergenceVerbatimDate");
            String coreId = parser.getValueByLabel("dwc:coreid");
            if (StringUtils.isBlank(coreId)) {
                LOG.warn("no coreid for line [" + parser.getLastLineNumber() + 1 + "]");
            } else {
                assocMap.put(coreId, prop);
            }
        }
        LabeledCSVParser occurrence = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(occTempFile));
        occurrence.changeDelimiter('\t');
        while (occurrence.getLine() != null) {
            String references = occurrence.getValueByLabel("dcterms:references");
            Study study = nodeFactory.getOrCreateStudy(new StudyImpl("seltmann" + references, CitationUtil.sourceCitationLastAccessed(this.getDataset(), references), null, references));
            String recordId = occurrence.getValueByLabel(FIELD_IDIGBIO_RECORD_ID);
            Map<String, String> assoc = assocMap.get(recordId);
            if (assoc != null) {
                String targetName = getTargetNameFromAssocMap(assoc);
                String sourceName = occurrence.getValueByLabel("scientificName");
                String eventDate = occurrence.getValueByLabel("eventDate");
                Date date = null;
                if (StringUtils.equals(eventDate, "0000-00-00")) {
                    getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
                } else if (StringUtils.isBlank(eventDate)) {
                    getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
                } else {
                    DateTimeFormatter fmtDateTime1 = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC();
                    String dateString = eventDate.split("/")[0];
                    try {
                        date = fmtDateTime1.parseDateTime(dateString).toDate();
                    } catch (IllegalArgumentException e) {
                        getLogger().warn(study, "invalid date [" + dateString + "] " + getLineMsg(occurrence));
                    }
                }
                if (StringUtils.isBlank(sourceName)) {
                    getLogger().warn(study, "found blank source taxon name" + getLineMsg(occurrence));
                }
                if (StringUtils.isBlank(targetName)) {
                    getLogger().warn(study, "found blank associated target taxon name" + getLineMsg(occurrence));
                }
                InteractType interactType = parseInteractType(occurrence, assoc);
                if (interactType != null && StringUtils.isNotBlank(sourceName) && StringUtils.isNotBlank(targetName)) {
                    try {
                        createInteraction(occurrence, study, assoc, targetName, sourceName, date, interactType);
                    } catch (NodeFactoryException ex) {
                        String message = "failed to import interaction because of [" + ex.getMessage() + "]" + getLineMsg(occurrence);
                        LOG.warn(message);
                        getLogger().warn(study, message);
                    }
                }
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
    db.close();
}
Also used : InteractType(org.eol.globi.domain.InteractType) Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Date(java.util.Date) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) HashMap(java.util.HashMap) Map(java.util.Map) HTreeMap(org.mapdb.HTreeMap) File(java.io.File) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) DB(org.mapdb.DB) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Example 49 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForRoopnarine method importTrophicInteractions.

private List<Specimen> importTrophicInteractions(String trophicGuildLookup, Map<Integer, List<String>> trophicGuildNumberToSpeciesMap, String studyResource, Study study, Location location) throws StudyImporterException {
    try {
        LabeledCSVParser parser = parserFactory.createParser(studyResource, CharsetConstant.UTF8);
        List<Specimen> predatorSpecimen = new ArrayList<Specimen>();
        while (parser.getLine() != null) {
            List<String> preyTaxonList = importPreyList(trophicGuildNumberToSpeciesMap, parser, study);
            if (preyTaxonList.size() > 0) {
                predatorSpecimen.addAll(importPredatorSpecimen(trophicGuildLookup, trophicGuildNumberToSpeciesMap, parser, preyTaxonList, study, location));
            }
        }
        return predatorSpecimen;
    } catch (IOException e) {
        throw new StudyImporterException("failed to read trophic guild lookup [" + trophicGuildLookup + "]", e);
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to import trophic links [" + studyResource + "]", e);
    } catch (StudyImporterException e) {
        throw new StudyImporterException("failed to import trophic links from resource [" + studyResource + "]", e);
    }
}
Also used : Specimen(org.eol.globi.domain.Specimen) ArrayList(java.util.ArrayList) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Example 50 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForWrast method createDepthMap.

private Map<String, Double> createDepthMap(Study study) throws StudyImporterException {
    Map<String, Double> depthMap;
    try {
        LabeledCSVParser depthParser = parserFactory.createParser(LAVACA_BAY_ENVIRONMENTAL, CharsetConstant.UTF8);
        depthMap = new HashMap<>();
        while (depthParser.getLine() != null) {
            String seasonDepth = depthParser.getValueByLabel("Season");
            String regionDepth = depthParser.getValueByLabel("Upper/Lower");
            String siteDepth = depthParser.getValueByLabel("Site");
            String habitatDepth = depthParser.getValueByLabel("Habitat");
            String depthString = depthParser.getValueByLabel("Depth (m)");
            String depthId = createDepthId(seasonDepth, regionDepth, siteDepth, habitatDepth);
            if (depthMap.get(depthId) == null) {
                try {
                    depthMap.put(depthId, Double.parseDouble(depthString));
                } catch (NumberFormatException ex) {
                    getLogger().warn(study, createMsgPrefix(depthParser) + "failed to parse depth for depthId [" + depthId + "], skipping entry");
                }
            } else {
                throw new StudyImporterException(createMsgPrefix(depthParser) + " found duplicate entries for unique combination of season,region,site and habitat: [" + seasonDepth + ", " + regionDepth + ", " + siteDepth + ", " + seasonDepth + "]");
            }
        }
    } catch (IOException e1) {
        throw new StudyImporterException("failed to read from [" + LAVACA_BAY_ENVIRONMENTAL + "]");
    }
    return depthMap;
}
Also used : LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6