use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBioInfo method importStudy.
@Override
public void importStudy() throws StudyImporterException {
Map<String, String> refMap;
LabeledCSVParser relationsParser;
try {
refMap = buildRefMap(parserFactory.createParser(REFERENCE_DATA_FILE, CharsetConstant.UTF8));
Map<String, Taxon> taxonMap = buildTaxonMap(parserFactory.createParser(TAXON_DATA_FILE, CharsetConstant.UTF8));
relationsParser = parserFactory.createParser(RELATIONS_DATA_FILE, CharsetConstant.UTF8);
createRelations(relationsParser, refMap, taxonMap);
} catch (IOException e1) {
throw new StudyImporterException("problem reading trophic relations file [" + RELATIONS_DATA_FILE + "]", e1);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class ReferenceUtil method buildRefMap.
protected static Map<String, String> buildRefMap(ParserFactory parserFactory, String referencePath, String keyColumnName, String valueColumnName, char delimiter) throws StudyImporterException {
Map<String, String> refMap = new TreeMap<String, String>();
try {
LabeledCSVParser referenceParser = parserFactory.createParser(referencePath, CharsetConstant.UTF8);
referenceParser.changeDelimiter(delimiter);
while (referenceParser.getLine() != null) {
String shortReference = referenceParser.getValueByLabel(keyColumnName);
if (StringUtils.isBlank(shortReference)) {
LOG.warn("missing short reference on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]");
} else {
String fullReference = referenceParser.getValueByLabel(valueColumnName);
if (StringUtils.isBlank(fullReference)) {
LOG.warn("missing full reference for [" + shortReference + "] on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]");
fullReference = shortReference;
}
if (StringUtils.isBlank(refMap.get(StringUtils.trim(shortReference)))) {
refMap.put(StringUtils.trim(shortReference), StringUtils.trim(fullReference));
} else {
LOG.warn("skipping [" + shortReference + "] on line [" + referenceParser.lastLineNumber() + "] in [" + referencePath + "]: key already defined.");
}
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to read resource [" + referencePath + "]", e);
}
return refMap;
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForSeltmann method importStudy.
@Override
public void importStudy() throws StudyImporterException {
final String archiveURL = DatasetUtil.getNamedResourceURI(getDataset(), "archive");
if (org.apache.commons.lang.StringUtils.isBlank(archiveURL)) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<String, Map<String, String>> assocMap = db.createHashMap("assocMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File assocTempFile = null;
File occTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))associatedTaxa.tsv$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))occurrences.tsv$")) {
occTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [associatedTaxa.tsv] resource");
}
if (occTempFile == null) {
throw new StudyImporterException("failed to find expected [occurrences.tsv] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(assocTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
while (parser.getLine() != null) {
Map<String, String> prop = new HashMap<String, String>();
addKeyValue(parser, prop, "dwc:coreid");
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, FIELD_IDIGBIO_RECORD_ID);
addKeyValue(parser, prop, FIELD_ASSOCIATED_GENUS);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SPECIFIC_EPITHET);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SCIENTIFIC_NAME);
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, "aec:associatedRelationshipTerm");
addKeyValue(parser, prop, "aec:associatedRelationshipURI");
addKeyValue(parser, prop, "aec:associatedLocationOnHost");
addKeyValue(parser, prop, "aec:associatedEmergenceVerbatimDate");
String coreId = parser.getValueByLabel("dwc:coreid");
if (StringUtils.isBlank(coreId)) {
LOG.warn("no coreid for line [" + parser.getLastLineNumber() + 1 + "]");
} else {
assocMap.put(coreId, prop);
}
}
LabeledCSVParser occurrence = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(occTempFile));
occurrence.changeDelimiter('\t');
while (occurrence.getLine() != null) {
String references = occurrence.getValueByLabel("dcterms:references");
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("seltmann" + references, CitationUtil.sourceCitationLastAccessed(this.getDataset(), references), null, references));
String recordId = occurrence.getValueByLabel(FIELD_IDIGBIO_RECORD_ID);
Map<String, String> assoc = assocMap.get(recordId);
if (assoc != null) {
String targetName = getTargetNameFromAssocMap(assoc);
String sourceName = occurrence.getValueByLabel("scientificName");
String eventDate = occurrence.getValueByLabel("eventDate");
Date date = null;
if (StringUtils.equals(eventDate, "0000-00-00")) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else if (StringUtils.isBlank(eventDate)) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else {
DateTimeFormatter fmtDateTime1 = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC();
String dateString = eventDate.split("/")[0];
try {
date = fmtDateTime1.parseDateTime(dateString).toDate();
} catch (IllegalArgumentException e) {
getLogger().warn(study, "invalid date [" + dateString + "] " + getLineMsg(occurrence));
}
}
if (StringUtils.isBlank(sourceName)) {
getLogger().warn(study, "found blank source taxon name" + getLineMsg(occurrence));
}
if (StringUtils.isBlank(targetName)) {
getLogger().warn(study, "found blank associated target taxon name" + getLineMsg(occurrence));
}
InteractType interactType = parseInteractType(occurrence, assoc);
if (interactType != null && StringUtils.isNotBlank(sourceName) && StringUtils.isNotBlank(targetName)) {
try {
createInteraction(occurrence, study, assoc, targetName, sourceName, date, interactType);
} catch (NodeFactoryException ex) {
String message = "failed to import interaction because of [" + ex.getMessage() + "]" + getLineMsg(occurrence);
LOG.warn(message);
getLogger().warn(study, message);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForRoopnarine method importTrophicInteractions.
private List<Specimen> importTrophicInteractions(String trophicGuildLookup, Map<Integer, List<String>> trophicGuildNumberToSpeciesMap, String studyResource, Study study, Location location) throws StudyImporterException {
try {
LabeledCSVParser parser = parserFactory.createParser(studyResource, CharsetConstant.UTF8);
List<Specimen> predatorSpecimen = new ArrayList<Specimen>();
while (parser.getLine() != null) {
List<String> preyTaxonList = importPreyList(trophicGuildNumberToSpeciesMap, parser, study);
if (preyTaxonList.size() > 0) {
predatorSpecimen.addAll(importPredatorSpecimen(trophicGuildLookup, trophicGuildNumberToSpeciesMap, parser, preyTaxonList, study, location));
}
}
return predatorSpecimen;
} catch (IOException e) {
throw new StudyImporterException("failed to read trophic guild lookup [" + trophicGuildLookup + "]", e);
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to import trophic links [" + studyResource + "]", e);
} catch (StudyImporterException e) {
throw new StudyImporterException("failed to import trophic links from resource [" + studyResource + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForWrast method createDepthMap.
private Map<String, Double> createDepthMap(Study study) throws StudyImporterException {
Map<String, Double> depthMap;
try {
LabeledCSVParser depthParser = parserFactory.createParser(LAVACA_BAY_ENVIRONMENTAL, CharsetConstant.UTF8);
depthMap = new HashMap<>();
while (depthParser.getLine() != null) {
String seasonDepth = depthParser.getValueByLabel("Season");
String regionDepth = depthParser.getValueByLabel("Upper/Lower");
String siteDepth = depthParser.getValueByLabel("Site");
String habitatDepth = depthParser.getValueByLabel("Habitat");
String depthString = depthParser.getValueByLabel("Depth (m)");
String depthId = createDepthId(seasonDepth, regionDepth, siteDepth, habitatDepth);
if (depthMap.get(depthId) == null) {
try {
depthMap.put(depthId, Double.parseDouble(depthString));
} catch (NumberFormatException ex) {
getLogger().warn(study, createMsgPrefix(depthParser) + "failed to parse depth for depthId [" + depthId + "], skipping entry");
}
} else {
throw new StudyImporterException(createMsgPrefix(depthParser) + " found duplicate entries for unique combination of season,region,site and habitat: [" + seasonDepth + ", " + regionDepth + ", " + siteDepth + ", " + seasonDepth + "]");
}
}
} catch (IOException e1) {
throw new StudyImporterException("failed to read from [" + LAVACA_BAY_ENVIRONMENTAL + "]");
}
return depthMap;
}
Aggregations