Search in sources :

Example 1 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForPlanque method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    LabeledCSVParser dataParser;
    try {
        dataParser = parserFactory.createParser(getLinks(), CharsetConstant.UTF8);
    } catch (IOException e) {
        throw new StudyImporterException("failed to read resource [" + getLinks() + "]", e);
    }
    dataParser.changeDelimiter('\t');
    Map<String, String> authorYearToFullReference = ReferenceUtil.buildRefMap(parserFactory, getReferences(), "AUTHOR_YEAR", "FULL_REFERENCE", '\t');
    Map<String, List<String>> pairwiseKeyToAuthorYears = new TreeMap<String, List<String>>();
    try {
        LabeledCSVParser referenceParser = parserFactory.createParser(getReferencesForLinks(), CharsetConstant.UTF8);
        referenceParser.changeDelimiter('\t');
        while (referenceParser.getLine() != null) {
            String pairwiseKey = referenceParser.getValueByLabel("PWKEY");
            String authorYear = referenceParser.getValueByLabel("AUTHOR_YEAR");
            if (StringUtils.isNotBlank(pairwiseKey) && StringUtils.isNotBlank(authorYear)) {
                List<String> authorYears = pairwiseKeyToAuthorYears.get(pairwiseKey);
                if (CollectionUtils.isEmpty(authorYears)) {
                    authorYears = new ArrayList<String>();
                }
                authorYears.add(authorYear);
                pairwiseKeyToAuthorYears.put(pairwiseKey, authorYears);
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to import [" + getReferencesForLinks() + "]", e);
    }
    Map<String, List<String>> pairwiseKeyToFullCitation = new TreeMap<String, List<String>>();
    for (String pairwiseKey : pairwiseKeyToAuthorYears.keySet()) {
        List<String> authorYearList = pairwiseKeyToAuthorYears.get(pairwiseKey);
        if (CollectionUtils.isEmpty(authorYearList)) {
            throw new StudyImporterException("found no AUTHOR_YEAR for PWKEY: [" + pairwiseKey + "]");
        }
        List<String> references = new ArrayList<String>();
        for (String authorYear : authorYearList) {
            String reference = authorYearToFullReference.get(authorYear);
            if (StringUtils.isBlank(reference)) {
                throw new StudyImporterException("found no FULL_CITATION for PWKEY: [" + pairwiseKey + "] and AUTHOR_YEAR [" + pairwiseKey + "]");
            } else {
                references.add(reference);
            }
        }
        pairwiseKeyToFullCitation.put(pairwiseKey, references);
    }
    try {
        while (dataParser.getLine() != null) {
            if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
                importLine(dataParser, pairwiseKeyToFullCitation);
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("problem importing study at line [" + dataParser.lastLineNumber() + "]", e);
    }
}
Also used : ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) TreeMap(java.util.TreeMap)

Example 2 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForRobledo method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    String description = "García-Robledo C, Erickson DL, Staines CL, Erwin TL, Kress WJ. Tropical Plant–Herbivore Networks: Reconstructing Species Interactions Using DNA Barcodes Heil M, editor. PLoS ONE [Internet]. 2013 January 8;8(1):e52967. Available from: http://dx.doi.org/10.1371/journal.pone.0052967";
    String doi = "http://dx.doi.org/10.1371/journal.pone.0052967";
    Study study1 = new StudyImpl("García-Robledo et al 2013", description, doi, description);
    Study study = nodeFactory.getOrCreateStudy(study1);
    Map<String, String> abrLookup = buildPlantLookup();
    // spatial location from: http://www.ots.ac.cr/index.php?option=com_content&task=view&id=163&Itemid=348
    Double latitude = LocationUtil.parseDegrees("10°26'N");
    Double longitude = LocationUtil.parseDegrees("83°59'W");
    Location location;
    try {
        location = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, 35.0, null));
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("failed to create location", e);
    }
    // TODO: need to map date range of collections
    String studyResource = "robledo/table_s1_extract.csv";
    try {
        LabeledCSVParser parser = parserFactory.createParser(studyResource, CharsetConstant.UTF8);
        while (parser.getLine() != null) {
            String beetleName = parser.getValueByLabel("Herbivore species");
            String beetleScientificName = completeBeetleName(beetleName);
            Specimen predator = nodeFactory.createSpecimen(study, new TaxonImpl(beetleScientificName, null));
            predator.caughtIn(location);
            for (String plantAbbreviation : abrLookup.keySet()) {
                String plantScientificName = abrLookup.get(plantAbbreviation);
                String valueByLabel = parser.getValueByLabel(plantAbbreviation);
                try {
                    int interactionCode = Integer.parseInt(valueByLabel);
                    if (interactionCode > 0) {
                        Specimen plant = nodeFactory.createSpecimen(study, new TaxonImpl(plantScientificName, null));
                        plant.caughtIn(location);
                        predator.ate(plant);
                    }
                } catch (NumberFormatException ex) {
                    getLogger().warn(study, "malformed or no value [" + valueByLabel + "] found for [" + plantScientificName + "(" + plantAbbreviation + ")" + "] and beetle [" + beetleScientificName + "] could be found in [" + studyResource + ":" + parser.lastLineNumber() + "]");
                }
            }
        }
    } catch (IOException e) {
        throw new StudyImporterException("problem reading [" + studyResource + "]", e);
    } catch (NodeFactoryException e) {
        throw new StudyImporterException("cannot create specimens from [" + studyResource + "]", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) Specimen(org.eol.globi.domain.Specimen) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location)

Example 3 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForTSV method importRepository.

private void importRepository(String namespace, String sourceCitation) throws IOException, StudyImporterException {
    InteractionListenerImpl interactionListenerImpl = new InteractionListenerImpl(nodeFactory, getGeoNamesService(), getLogger());
    LabeledCSVParser parser = parserFactory.createParser(getDataset().getResourceURI("/interactions.tsv").toString(), "UTF-8");
    parser.changeDelimiter('\t');
    while (parser.getLine() != null) {
        final Map<String, String> link = new TreeMap<String, String>();
        final String referenceDoi = StringUtils.replace(parser.getValueByLabel(REFERENCE_DOI), " ", "");
        putNotBlank(link, REFERENCE_DOI, referenceDoi);
        putNotBlank(link, REFERENCE_CITATION, CSVTSVUtil.valueOrNull(parser, REFERENCE_CITATION));
        putNotBlank(link, REFERENCE_URL, CSVTSVUtil.valueOrNull(parser, REFERENCE_URL));
        putNotBlank(link, STUDY_SOURCE_CITATION, CitationUtil.sourceCitationLastAccessed(getDataset(), sourceCitation == null ? "" : sourceCitation + ". "));
        putNotBlank(link, SOURCE_TAXON_ID, StringUtils.trimToNull(parser.getValueByLabel(SOURCE_TAXON_ID)));
        putNotBlank(link, SOURCE_TAXON_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_TAXON_NAME)));
        putNotBlank(link, TARGET_TAXON_ID, StringUtils.trimToNull(parser.getValueByLabel(TARGET_TAXON_ID)));
        putNotBlank(link, TARGET_TAXON_NAME, StringUtils.trim(parser.getValueByLabel(TARGET_TAXON_NAME)));
        putNotBlank(link, INTERACTION_TYPE_ID, StringUtils.trim(parser.getValueByLabel(INTERACTION_TYPE_ID)));
        putNotBlank(link, DECIMAL_LATITUDE, StringUtils.trim(parser.getValueByLabel(DECIMAL_LATITUDE)));
        putNotBlank(link, DECIMAL_LONGITUDE, StringUtils.trim(parser.getValueByLabel(DECIMAL_LONGITUDE)));
        putNotBlank(link, LOCALITY_ID, StringUtils.trim(parser.getValueByLabel(LOCALITY_ID)));
        putNotBlank(link, SOURCE_BODY_PART_ID, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_ID)));
        putNotBlank(link, SOURCE_BODY_PART_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_NAME)));
        putNotBlank(link, TARGET_BODY_PART_ID, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_ID)));
        putNotBlank(link, TARGET_BODY_PART_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_NAME)));
        attemptToGenerateReferencePropertiesIfMissing(namespace, link);
        interactionListenerImpl.newLink(link);
    }
}
Also used : LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) TreeMap(java.util.TreeMap)

Example 4 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForWebOfLife method importNetworks.

public void importNetworks(String archiveURL, String sourceCitation) throws StudyImporterException {
    try {
        InputStream inputStream = getDataset().getResource(archiveURL);
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File referencesTempFile = null;
        Map<String, File> networkTempFileMap = new HashMap<String, File>();
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))references\\.csv$")) {
                referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches(".*\\.csv$")) {
                networkTempFileMap.put(entry.getName().replace(".csv", ""), FileUtils.saveToTmpFile(zipInputStream, entry));
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (referencesTempFile == null) {
            throw new StudyImporterException("failed to find expected [references.csv] resource in [" + archiveURL + "]");
        }
        if (networkTempFileMap.size() == 0) {
            throw new StudyImporterException("failed to find expected network csv files");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(referencesTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        while (parser.getLine() != null) {
            final String citation = parser.getValueByLabel("Reference");
            if (StringUtils.isBlank(citation)) {
                throw new StudyImporterException("found missing reference");
            }
            final String networkId = parser.getValueByLabel("ID");
            if (!networkTempFileMap.containsKey(networkId)) {
                throw new StudyImporterException("found network id [" + networkId + "], but no associated data.");
            }
            final Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bascompte:" + citation, sourceCitation, null, citation));
            importNetwork(parseInteractionType(parser), parseLocation(parser), study, networkTempFileMap.get(networkId));
        }
    } catch (IOException e) {
        throw new StudyImporterException(e);
    } catch (NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
}
Also used : Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) File(java.io.File) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Example 5 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForRoopnarine method buildGuildLookup.

private Map<Integer, List<String>> buildGuildLookup(String trophicGuildLookup) throws StudyImporterException {
    final Map<Integer, List<String>> trophicGuildNumberToSpeciesMap = new HashMap<Integer, List<String>>();
    try {
        LabeledCSVParser parser = parserFactory.createParser(trophicGuildLookup, CharsetConstant.UTF8);
        while (parser.getLine() != null) {
            Integer guildNumber = parseGuildNumber(trophicGuildLookup, parser);
            String taxaName = parser.getValueByLabel("Taxa");
            if (null == taxaName) {
                throw new StudyImporterException("missing or empty Taxa field");
            }
            List<String> taxaForGuild = trophicGuildNumberToSpeciesMap.get(guildNumber);
            if (null == taxaForGuild) {
                taxaForGuild = new ArrayList<String>();
                trophicGuildNumberToSpeciesMap.put(guildNumber, taxaForGuild);
            }
            taxaForGuild.add(taxaName.trim());
        }
    } catch (IOException e) {
        throw new StudyImporterException("failed to read trophic guild lookup [" + trophicGuildLookup + "]", e);
    }
    return trophicGuildNumberToSpeciesMap;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6