use of org.mapdb.HTreeMap in project eol-globi-data by jhpoelen.
the class StudyImporterForSeltmann method importStudy.
@Override
public void importStudy() throws StudyImporterException {
final String archiveURL = DatasetUtil.getNamedResourceURI(getDataset(), "archive");
if (org.apache.commons.lang.StringUtils.isBlank(archiveURL)) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<String, Map<String, String>> assocMap = db.createHashMap("assocMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File assocTempFile = null;
File occTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))associatedTaxa.tsv$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))occurrences.tsv$")) {
occTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [associatedTaxa.tsv] resource");
}
if (occTempFile == null) {
throw new StudyImporterException("failed to find expected [occurrences.tsv] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(assocTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
while (parser.getLine() != null) {
Map<String, String> prop = new HashMap<String, String>();
addKeyValue(parser, prop, "dwc:coreid");
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, FIELD_IDIGBIO_RECORD_ID);
addKeyValue(parser, prop, FIELD_ASSOCIATED_GENUS);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SPECIFIC_EPITHET);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SCIENTIFIC_NAME);
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, "aec:associatedRelationshipTerm");
addKeyValue(parser, prop, "aec:associatedRelationshipURI");
addKeyValue(parser, prop, "aec:associatedLocationOnHost");
addKeyValue(parser, prop, "aec:associatedEmergenceVerbatimDate");
String coreId = parser.getValueByLabel("dwc:coreid");
if (StringUtils.isBlank(coreId)) {
LOG.warn("no coreid for line [" + parser.getLastLineNumber() + 1 + "]");
} else {
assocMap.put(coreId, prop);
}
}
LabeledCSVParser occurrence = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(occTempFile));
occurrence.changeDelimiter('\t');
while (occurrence.getLine() != null) {
String references = occurrence.getValueByLabel("dcterms:references");
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("seltmann" + references, CitationUtil.sourceCitationLastAccessed(this.getDataset(), references), null, references));
String recordId = occurrence.getValueByLabel(FIELD_IDIGBIO_RECORD_ID);
Map<String, String> assoc = assocMap.get(recordId);
if (assoc != null) {
String targetName = getTargetNameFromAssocMap(assoc);
String sourceName = occurrence.getValueByLabel("scientificName");
String eventDate = occurrence.getValueByLabel("eventDate");
Date date = null;
if (StringUtils.equals(eventDate, "0000-00-00")) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else if (StringUtils.isBlank(eventDate)) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else {
DateTimeFormatter fmtDateTime1 = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC();
String dateString = eventDate.split("/")[0];
try {
date = fmtDateTime1.parseDateTime(dateString).toDate();
} catch (IllegalArgumentException e) {
getLogger().warn(study, "invalid date [" + dateString + "] " + getLineMsg(occurrence));
}
}
if (StringUtils.isBlank(sourceName)) {
getLogger().warn(study, "found blank source taxon name" + getLineMsg(occurrence));
}
if (StringUtils.isBlank(targetName)) {
getLogger().warn(study, "found blank associated target taxon name" + getLineMsg(occurrence));
}
InteractType interactType = parseInteractType(occurrence, assoc);
if (interactType != null && StringUtils.isNotBlank(sourceName) && StringUtils.isNotBlank(targetName)) {
try {
createInteraction(occurrence, study, assoc, targetName, sourceName, date, interactType);
} catch (NodeFactoryException ex) {
String message = "failed to import interaction because of [" + ex.getMessage() + "]" + getLineMsg(occurrence);
LOG.warn(message);
getLogger().warn(study, message);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
Aggregations