use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForPlanque method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser dataParser;
try {
dataParser = parserFactory.createParser(getLinks(), CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource [" + getLinks() + "]", e);
}
dataParser.changeDelimiter('\t');
Map<String, String> authorYearToFullReference = ReferenceUtil.buildRefMap(parserFactory, getReferences(), "AUTHOR_YEAR", "FULL_REFERENCE", '\t');
Map<String, List<String>> pairwiseKeyToAuthorYears = new TreeMap<String, List<String>>();
try {
LabeledCSVParser referenceParser = parserFactory.createParser(getReferencesForLinks(), CharsetConstant.UTF8);
referenceParser.changeDelimiter('\t');
while (referenceParser.getLine() != null) {
String pairwiseKey = referenceParser.getValueByLabel("PWKEY");
String authorYear = referenceParser.getValueByLabel("AUTHOR_YEAR");
if (StringUtils.isNotBlank(pairwiseKey) && StringUtils.isNotBlank(authorYear)) {
List<String> authorYears = pairwiseKeyToAuthorYears.get(pairwiseKey);
if (CollectionUtils.isEmpty(authorYears)) {
authorYears = new ArrayList<String>();
}
authorYears.add(authorYear);
pairwiseKeyToAuthorYears.put(pairwiseKey, authorYears);
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to import [" + getReferencesForLinks() + "]", e);
}
Map<String, List<String>> pairwiseKeyToFullCitation = new TreeMap<String, List<String>>();
for (String pairwiseKey : pairwiseKeyToAuthorYears.keySet()) {
List<String> authorYearList = pairwiseKeyToAuthorYears.get(pairwiseKey);
if (CollectionUtils.isEmpty(authorYearList)) {
throw new StudyImporterException("found no AUTHOR_YEAR for PWKEY: [" + pairwiseKey + "]");
}
List<String> references = new ArrayList<String>();
for (String authorYear : authorYearList) {
String reference = authorYearToFullReference.get(authorYear);
if (StringUtils.isBlank(reference)) {
throw new StudyImporterException("found no FULL_CITATION for PWKEY: [" + pairwiseKey + "] and AUTHOR_YEAR [" + pairwiseKey + "]");
} else {
references.add(reference);
}
}
pairwiseKeyToFullCitation.put(pairwiseKey, references);
}
try {
while (dataParser.getLine() != null) {
if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
importLine(dataParser, pairwiseKeyToFullCitation);
}
}
} catch (IOException e) {
throw new StudyImporterException("problem importing study at line [" + dataParser.lastLineNumber() + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForRobledo method importStudy.
@Override
public void importStudy() throws StudyImporterException {
String description = "García-Robledo C, Erickson DL, Staines CL, Erwin TL, Kress WJ. Tropical Plant–Herbivore Networks: Reconstructing Species Interactions Using DNA Barcodes Heil M, editor. PLoS ONE [Internet]. 2013 January 8;8(1):e52967. Available from: http://dx.doi.org/10.1371/journal.pone.0052967";
String doi = "http://dx.doi.org/10.1371/journal.pone.0052967";
Study study1 = new StudyImpl("García-Robledo et al 2013", description, doi, description);
Study study = nodeFactory.getOrCreateStudy(study1);
Map<String, String> abrLookup = buildPlantLookup();
// spatial location from: http://www.ots.ac.cr/index.php?option=com_content&task=view&id=163&Itemid=348
Double latitude = LocationUtil.parseDegrees("10°26'N");
Double longitude = LocationUtil.parseDegrees("83°59'W");
Location location;
try {
location = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, 35.0, null));
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to create location", e);
}
// TODO: need to map date range of collections
String studyResource = "robledo/table_s1_extract.csv";
try {
LabeledCSVParser parser = parserFactory.createParser(studyResource, CharsetConstant.UTF8);
while (parser.getLine() != null) {
String beetleName = parser.getValueByLabel("Herbivore species");
String beetleScientificName = completeBeetleName(beetleName);
Specimen predator = nodeFactory.createSpecimen(study, new TaxonImpl(beetleScientificName, null));
predator.caughtIn(location);
for (String plantAbbreviation : abrLookup.keySet()) {
String plantScientificName = abrLookup.get(plantAbbreviation);
String valueByLabel = parser.getValueByLabel(plantAbbreviation);
try {
int interactionCode = Integer.parseInt(valueByLabel);
if (interactionCode > 0) {
Specimen plant = nodeFactory.createSpecimen(study, new TaxonImpl(plantScientificName, null));
plant.caughtIn(location);
predator.ate(plant);
}
} catch (NumberFormatException ex) {
getLogger().warn(study, "malformed or no value [" + valueByLabel + "] found for [" + plantScientificName + "(" + plantAbbreviation + ")" + "] and beetle [" + beetleScientificName + "] could be found in [" + studyResource + ":" + parser.lastLineNumber() + "]");
}
}
}
} catch (IOException e) {
throw new StudyImporterException("problem reading [" + studyResource + "]", e);
} catch (NodeFactoryException e) {
throw new StudyImporterException("cannot create specimens from [" + studyResource + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForTSV method importRepository.
private void importRepository(String namespace, String sourceCitation) throws IOException, StudyImporterException {
InteractionListenerImpl interactionListenerImpl = new InteractionListenerImpl(nodeFactory, getGeoNamesService(), getLogger());
LabeledCSVParser parser = parserFactory.createParser(getDataset().getResourceURI("/interactions.tsv").toString(), "UTF-8");
parser.changeDelimiter('\t');
while (parser.getLine() != null) {
final Map<String, String> link = new TreeMap<String, String>();
final String referenceDoi = StringUtils.replace(parser.getValueByLabel(REFERENCE_DOI), " ", "");
putNotBlank(link, REFERENCE_DOI, referenceDoi);
putNotBlank(link, REFERENCE_CITATION, CSVTSVUtil.valueOrNull(parser, REFERENCE_CITATION));
putNotBlank(link, REFERENCE_URL, CSVTSVUtil.valueOrNull(parser, REFERENCE_URL));
putNotBlank(link, STUDY_SOURCE_CITATION, CitationUtil.sourceCitationLastAccessed(getDataset(), sourceCitation == null ? "" : sourceCitation + ". "));
putNotBlank(link, SOURCE_TAXON_ID, StringUtils.trimToNull(parser.getValueByLabel(SOURCE_TAXON_ID)));
putNotBlank(link, SOURCE_TAXON_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_TAXON_NAME)));
putNotBlank(link, TARGET_TAXON_ID, StringUtils.trimToNull(parser.getValueByLabel(TARGET_TAXON_ID)));
putNotBlank(link, TARGET_TAXON_NAME, StringUtils.trim(parser.getValueByLabel(TARGET_TAXON_NAME)));
putNotBlank(link, INTERACTION_TYPE_ID, StringUtils.trim(parser.getValueByLabel(INTERACTION_TYPE_ID)));
putNotBlank(link, DECIMAL_LATITUDE, StringUtils.trim(parser.getValueByLabel(DECIMAL_LATITUDE)));
putNotBlank(link, DECIMAL_LONGITUDE, StringUtils.trim(parser.getValueByLabel(DECIMAL_LONGITUDE)));
putNotBlank(link, LOCALITY_ID, StringUtils.trim(parser.getValueByLabel(LOCALITY_ID)));
putNotBlank(link, SOURCE_BODY_PART_ID, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_ID)));
putNotBlank(link, SOURCE_BODY_PART_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_NAME)));
putNotBlank(link, TARGET_BODY_PART_ID, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_ID)));
putNotBlank(link, TARGET_BODY_PART_NAME, StringUtils.trim(parser.getValueByLabel(SOURCE_BODY_PART_NAME)));
attemptToGenerateReferencePropertiesIfMissing(namespace, link);
interactionListenerImpl.newLink(link);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForWebOfLife method importNetworks.
public void importNetworks(String archiveURL, String sourceCitation) throws StudyImporterException {
try {
InputStream inputStream = getDataset().getResource(archiveURL);
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File referencesTempFile = null;
Map<String, File> networkTempFileMap = new HashMap<String, File>();
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))references\\.csv$")) {
referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches(".*\\.csv$")) {
networkTempFileMap.put(entry.getName().replace(".csv", ""), FileUtils.saveToTmpFile(zipInputStream, entry));
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (referencesTempFile == null) {
throw new StudyImporterException("failed to find expected [references.csv] resource in [" + archiveURL + "]");
}
if (networkTempFileMap.size() == 0) {
throw new StudyImporterException("failed to find expected network csv files");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(referencesTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
while (parser.getLine() != null) {
final String citation = parser.getValueByLabel("Reference");
if (StringUtils.isBlank(citation)) {
throw new StudyImporterException("found missing reference");
}
final String networkId = parser.getValueByLabel("ID");
if (!networkTempFileMap.containsKey(networkId)) {
throw new StudyImporterException("found network id [" + networkId + "], but no associated data.");
}
final Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bascompte:" + citation, sourceCitation, null, citation));
importNetwork(parseInteractionType(parser), parseLocation(parser), study, networkTempFileMap.get(networkId));
}
} catch (IOException e) {
throw new StudyImporterException(e);
} catch (NodeFactoryException e) {
throw new StudyImporterException(e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForRoopnarine method buildGuildLookup.
private Map<Integer, List<String>> buildGuildLookup(String trophicGuildLookup) throws StudyImporterException {
final Map<Integer, List<String>> trophicGuildNumberToSpeciesMap = new HashMap<Integer, List<String>>();
try {
LabeledCSVParser parser = parserFactory.createParser(trophicGuildLookup, CharsetConstant.UTF8);
while (parser.getLine() != null) {
Integer guildNumber = parseGuildNumber(trophicGuildLookup, parser);
String taxaName = parser.getValueByLabel("Taxa");
if (null == taxaName) {
throw new StudyImporterException("missing or empty Taxa field");
}
List<String> taxaForGuild = trophicGuildNumberToSpeciesMap.get(guildNumber);
if (null == taxaForGuild) {
taxaForGuild = new ArrayList<String>();
trophicGuildNumberToSpeciesMap.put(guildNumber, taxaForGuild);
}
taxaForGuild.add(taxaName.trim());
}
} catch (IOException e) {
throw new StudyImporterException("failed to read trophic guild lookup [" + trophicGuildLookup + "]", e);
}
return trophicGuildNumberToSpeciesMap;
}
Aggregations