use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForCoetzer method importStudy.
@Override
public void importStudy() throws StudyImporterException {
if (org.apache.commons.lang.StringUtils.isBlank(getResourceArchiveURI())) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<Integer, String> taxonMap = db.createHashMap("taxonMap").make();
final HTreeMap<Integer, String> refMap = db.createHashMap("refMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File taxonTempFile = null;
File assocTempFile = null;
File referencesTempFile = null;
File distributionTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))taxon.txt$")) {
taxonTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))description.txt$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))references.txt$")) {
referencesTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))distribution.txt$")) {
distributionTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (taxonTempFile == null) {
throw new StudyImporterException("failed to find expected [taxon.txt] resource");
}
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [description.txt] resource");
}
if (referencesTempFile == null) {
throw new StudyImporterException("failed to find expected [references.txt] resource");
}
if (distributionTempFile == null) {
throw new StudyImporterException("failed to find expected [distribution.txt] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(taxonTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
String[] line;
while ((line = parser.getLine()) != null) {
taxonMap.put(Integer.parseInt(line[0]), nameFor(line));
}
LabeledCSVParser refs = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(referencesTempFile));
refs.changeDelimiter('\t');
String[] refsLine;
while ((refsLine = refs.getLine()) != null) {
refMap.put(Integer.parseInt(refsLine[0]), refsLine[1]);
}
LabeledCSVParser assoc = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(assocTempFile));
assoc.changeDelimiter('\t');
final Map<String, InteractType> interactTypeMap = new HashMap<String, InteractType>() {
{
put("Visits flowers of", InteractType.VISITS_FLOWERS_OF);
put("Host of", InteractType.VISITS_FLOWERS_OF);
put("Parasite of", InteractType.PARASITE_OF);
put("Nests in", InteractType.INTERACTS_WITH);
}
};
String[] assocLine;
while ((assocLine = assoc.getLine()) != null) {
final Integer taxonId = Integer.parseInt(assocLine[0]);
final String[] parts = assocLine[2].split(":");
if (parts.length > 1) {
String interactionString = parts[0];
String[] targetTaxonNames = parts[1].split(",");
for (String targetTaxonName : targetTaxonNames) {
final String reference = refMap.get(taxonId);
final String sourceTaxonName = taxonMap.get(taxonId);
if (StringUtils.isNotBlank(reference) && StringUtils.isNotBlank(sourceTaxonName)) {
final Study study = nodeFactory.getOrCreateStudy(new StudyImpl(getSourceCitation() + reference, getSourceCitationLastAccessed(), null, reference));
final Specimen source = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(sourceTaxonName), null));
final Specimen target = nodeFactory.createSpecimen(study, new TaxonImpl(StringUtils.trim(targetTaxonName), null));
final InteractType relType = interactTypeMap.get(interactionString);
if (relType == null) {
throw new StudyImporterException("found unsupported interaction type [" + interactionString + "]");
}
source.interactsWith(target, relType);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForCook method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser parser;
try {
parser = parserFactory.createParser(DATASET_RESOURCE_NAME, CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource", e);
}
String citation = "Cook CW. The Early Life History and Reproductive Biology of Cymothoa excisa, a Marine Isopod Parasitizing Atlantic Croaker, (Micropogonias undulatus), along the Texas Coast. 2012. Master Thesis. Available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.";
StudyImpl study1 = new StudyImpl("Cook 2012", "Data provided by Colt W. Cook. Also available from http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285.", null, citation);
study1.setExternalId("http://repositories.lib.utexas.edu/handle/2152/ETD-UT-2012-08-6285");
Study study = nodeFactory.getOrCreateStudy(study1);
try {
Double latitude = LocationUtil.parseDegrees("27º51'N");
Double longitude = LocationUtil.parseDegrees("97º8'W");
Location sampleLocation = nodeFactory.getOrCreateLocation(new LocationImpl(latitude, longitude, -3.0, null));
try {
while (parser.getLine() != null) {
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl("Micropogonias undulatus", null));
host.setLengthInMm(Double.parseDouble(parser.getValueByLabel("Fish Length")) * 10.0);
String dateString = parser.getValueByLabel("Date");
Date collectionDate = DateUtil.parsePatternUTC(dateString, "MM/dd/yyyy").toDate();
nodeFactory.setUnixEpochProperty(host, collectionDate);
host.caughtIn(sampleLocation);
String[] isoCols = { "Iso 1", "Iso 2", "Iso 3", "Iso 4 ", "Iso 5" };
for (String isoCol : isoCols) {
addParasites(parser, study, sampleLocation, host, collectionDate, isoCol);
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to parse [" + DATASET_RESOURCE_NAME + "]", e);
} catch (IllegalArgumentException e) {
throw new StudyImporterException("failed to parse date", e);
}
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to create host and parasite taxons", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBell method importStudy.
@Override
public void importStudy() throws StudyImporterException {
for (String resource : RESOURCE) {
LabeledCSVParser parser = null;
try {
parser = parserFactory.createParser(resource, "UTF-8");
while (parser.getLine() != null) {
String sourceCitation = "Bell, K. C., Matek, D., Demboski, J. R., & Cook, J. A. (2015). Expanded Host Range of Sucking Lice and Pinworms of Western North American Chipmunks. Comparative Parasitology, 82(2), 312–321. doi:10.1654/4756.1 . Data provided by Kayce C. Bell.";
String guid = parser.getValueByLabel("GUID");
String externalId = "http://arctos.database.museum/guid/" + guid;
String description = null;
String collectionId = null;
for (String key : REFS.keySet()) {
if (guid.startsWith(key)) {
description = REFS.get(key);
collectionId = key;
break;
}
}
if (StringUtils.isBlank(description)) {
LOG.warn("missing collectionId [" + guid + "] in file [" + resource + "] on line [" + parser.lastLineNumber() + "]");
description = sourceCitation;
collectionId = "";
}
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("bell-" + collectionId, sourceCitation, "http://dx.doi.org/10.1654/4756.1", ExternalIdUtil.toCitation(null, sourceCitation + " " + description, null)));
String genus = parser.getValueByLabel("Genus");
String species = parser.getValueByLabel("Species");
String parasiteName = StringUtils.join(new String[] { StringUtils.trim(genus), StringUtils.trim(species) }, " ");
Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
parasite.setExternalId(externalId);
Location location = getLocation(parser, parasite);
parasite.caughtIn(location);
String scientificName = parser.getValueByLabel("SCIENTIFIC_NAME");
String hostName = StringUtils.trim(scientificName);
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
host.caughtIn(location);
host.setExternalId(externalId);
parasite.interactsWith(host, InteractType.PARASITE_OF);
Date date = parseDate(parser);
nodeFactory.setUnixEpochProperty(parasite, date);
nodeFactory.setUnixEpochProperty(host, date);
}
} catch (Throwable e) {
throw new StudyImporterException(getErrorMessage(resource, parser), e);
}
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForByrnes method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser dataParser;
try {
dataParser = parserFactory.createParser(RESOURCE_PATH, CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource [" + RESOURCE_PATH + "]", e);
}
Map<String, String> refMap = buildRefMap();
try {
while (dataParser.getLine() != null) {
if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
importLine(dataParser, refMap);
}
}
} catch (IOException e) {
throw new StudyImporterException("problem importing study at line [" + dataParser.lastLineNumber() + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGoMexSI2Test method importSinglePrey.
@Test
public void importSinglePrey() throws IOException, StudyImporterException {
final Map<String, String> parsedProperties = new HashMap<String, String>();
String predOneLine = "DATA_ID,PRED_ID,PREY_SOURCE_NAME,PREY_DATABASE_NAME,PHYSIOLOG_STATE,SED_ORIGIN,PREY_PARTS,PREY_LIFE_HIST_STAGE,PREY_COND_INDEX,PREY_SEX,PREY_SEX_RATIO,PREY_LEN_TYPE,PREY_MIN_LEN,PREY_MAX_LEN,PREY_MN_LEN,PREY_MIN_WIDTH,PREY_MAX_WIDTH,PREY_MN_WIDTH,BIOMASS,BIOMASS_QUALIFIER,PCT_BIOMASS,PCT_BIOMASS_QUALIFIER,N_CONS,N_CONS_QUALIFIER,PCT_N_CONS,PCT_N_CONS_QUALIFIER,VOL_CONS,VOL_CONS_QUALIFIER,PCT_VOL_CONS,PCT_VOL_CONS_QUALIFIER,FREQ_OCC,FREQ_OCC_QUALIFIER,PCT_FREQ_OCC,PCT_FREQ_OCC_QUALIFIER,IRI,PCT_IRI,IRIa,IIR,E,PREY_NOTES,ENTRY_DATE,ENTRY_PERSON,EDITED_DATE,DATA_EDITOR,MODIFY_DATE,DATA_MODIFIER\n" + "ACT_16R,Cchr.1,Crustacea,Crustacea,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.245,NA,0.15,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Jim Simons,NA,Jim Simons,27/06/2016,Theresa Mitchell\n";
StudyImporterForGoMexSI2.parseSpecimen("test.txt", "PREY_", new ParseEventHandler() {
@Override
public void onSpecimen(String predatorUID, Map<String, String> properties) {
parsedProperties.putAll(properties);
}
}, new LabeledCSVParser(new CSVParser(new StringReader(predOneLine))));
assertThat(parsedProperties.get("name"), is("Crustacea"));
assertThat(parsedProperties.get("GOMEXSI:PREY_SOURCE_NAME"), is("Crustacea"));
}
Aggregations