Search in sources :

Example 1 with HTreeMap

use of org.mapdb.HTreeMap in project eol-globi-data by jhpoelen.

the class StudyImporterForSeltmann method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    final String archiveURL = DatasetUtil.getNamedResourceURI(getDataset(), "archive");
    if (org.apache.commons.lang.StringUtils.isBlank(archiveURL)) {
        throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
    }
    DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
    final HTreeMap<String, Map<String, String>> assocMap = db.createHashMap("assocMap").make();
    try {
        InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
        ZipInputStream zipInputStream = new ZipInputStream(inputStream);
        ZipEntry entry;
        File assocTempFile = null;
        File occTempFile = null;
        while ((entry = zipInputStream.getNextEntry()) != null) {
            if (entry.getName().matches("(^|(.*/))associatedTaxa.tsv$")) {
                assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else if (entry.getName().matches("(^|(.*/))occurrences.tsv$")) {
                occTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
            } else {
                IOUtils.copy(zipInputStream, new NullOutputStream());
            }
        }
        IOUtils.closeQuietly(zipInputStream);
        if (assocTempFile == null) {
            throw new StudyImporterException("failed to find expected [associatedTaxa.tsv] resource");
        }
        if (occTempFile == null) {
            throw new StudyImporterException("failed to find expected [occurrences.tsv] resource");
        }
        BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(assocTempFile), CharsetConstant.UTF8);
        LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
        parser.changeDelimiter('\t');
        while (parser.getLine() != null) {
            Map<String, String> prop = new HashMap<String, String>();
            addKeyValue(parser, prop, "dwc:coreid");
            addKeyValue(parser, prop, "dwc:basisOfRecord");
            addKeyValue(parser, prop, FIELD_IDIGBIO_RECORD_ID);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_GENUS);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_SPECIFIC_EPITHET);
            addKeyValue(parser, prop, FIELD_ASSOCIATED_SCIENTIFIC_NAME);
            addKeyValue(parser, prop, "dwc:basisOfRecord");
            addKeyValue(parser, prop, "aec:associatedRelationshipTerm");
            addKeyValue(parser, prop, "aec:associatedRelationshipURI");
            addKeyValue(parser, prop, "aec:associatedLocationOnHost");
            addKeyValue(parser, prop, "aec:associatedEmergenceVerbatimDate");
            String coreId = parser.getValueByLabel("dwc:coreid");
            if (StringUtils.isBlank(coreId)) {
                LOG.warn("no coreid for line [" + parser.getLastLineNumber() + 1 + "]");
            } else {
                assocMap.put(coreId, prop);
            }
        }
        LabeledCSVParser occurrence = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(occTempFile));
        occurrence.changeDelimiter('\t');
        while (occurrence.getLine() != null) {
            String references = occurrence.getValueByLabel("dcterms:references");
            Study study = nodeFactory.getOrCreateStudy(new StudyImpl("seltmann" + references, CitationUtil.sourceCitationLastAccessed(this.getDataset(), references), null, references));
            String recordId = occurrence.getValueByLabel(FIELD_IDIGBIO_RECORD_ID);
            Map<String, String> assoc = assocMap.get(recordId);
            if (assoc != null) {
                String targetName = getTargetNameFromAssocMap(assoc);
                String sourceName = occurrence.getValueByLabel("scientificName");
                String eventDate = occurrence.getValueByLabel("eventDate");
                Date date = null;
                if (StringUtils.equals(eventDate, "0000-00-00")) {
                    getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
                } else if (StringUtils.isBlank(eventDate)) {
                    getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
                } else {
                    DateTimeFormatter fmtDateTime1 = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC();
                    String dateString = eventDate.split("/")[0];
                    try {
                        date = fmtDateTime1.parseDateTime(dateString).toDate();
                    } catch (IllegalArgumentException e) {
                        getLogger().warn(study, "invalid date [" + dateString + "] " + getLineMsg(occurrence));
                    }
                }
                if (StringUtils.isBlank(sourceName)) {
                    getLogger().warn(study, "found blank source taxon name" + getLineMsg(occurrence));
                }
                if (StringUtils.isBlank(targetName)) {
                    getLogger().warn(study, "found blank associated target taxon name" + getLineMsg(occurrence));
                }
                InteractType interactType = parseInteractType(occurrence, assoc);
                if (interactType != null && StringUtils.isNotBlank(sourceName) && StringUtils.isNotBlank(targetName)) {
                    try {
                        createInteraction(occurrence, study, assoc, targetName, sourceName, date, interactType);
                    } catch (NodeFactoryException ex) {
                        String message = "failed to import interaction because of [" + ex.getMessage() + "]" + getLineMsg(occurrence);
                        LOG.warn(message);
                        getLogger().warn(study, message);
                    }
                }
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException(e);
    }
    db.close();
}
Also used : InteractType(org.eol.globi.domain.InteractType) Study(org.eol.globi.domain.Study) HashMap(java.util.HashMap) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Date(java.util.Date) ZipInputStream(java.util.zip.ZipInputStream) BufferedReader(java.io.BufferedReader) HashMap(java.util.HashMap) Map(java.util.Map) HTreeMap(org.mapdb.HTreeMap) File(java.io.File) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) DB(org.mapdb.DB) NullOutputStream(org.apache.commons.io.output.NullOutputStream)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ZipEntry (java.util.zip.ZipEntry)1 ZipInputStream (java.util.zip.ZipInputStream)1 NullOutputStream (org.apache.commons.io.output.NullOutputStream)1 InteractType (org.eol.globi.domain.InteractType)1 Study (org.eol.globi.domain.Study)1 StudyImpl (org.eol.globi.domain.StudyImpl)1 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)1 DB (org.mapdb.DB)1 HTreeMap (org.mapdb.HTreeMap)1