use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class TaxonCacheService method initTaxonCache.
private void initTaxonCache() throws PropertyEnricherException {
DB db = initDb("taxonCache");
String taxonCacheName = "taxonCacheById";
if (db.exists(taxonCacheName)) {
LOG.info("re-using pre-existing cache");
resolvedIdToTaxonMap = db.getTreeMap(taxonCacheName);
} else {
LOG.info("no pre-existing cache found, rebuilding...");
LOG.info("taxon cache loading [" + taxonCacheResource + "]...");
StopWatch watch = new StopWatch();
watch.start();
try {
resolvedIdToTaxonMap = db.createTreeMap(taxonCacheName).pumpPresort(100000).pumpIgnoreDuplicates().pumpSource(taxonCacheIterator(taxonCacheResource, new LineSkipper() {
@Override
public boolean shouldSkipLine(LabeledCSVParser parser) {
final Taxon taxon = TaxonCacheParser.parseLine(parser);
return StringUtils.isBlank(taxon.getPath());
}
})).keySerializer(BTreeKeySerializer.STRING).make();
} catch (IOException e) {
throw new PropertyEnricherException("failed to instantiate taxonCache: [" + e.getMessage() + "]", e);
}
watch.stop();
LOG.info("taxon cache loading [" + taxonCacheResource + "] done.");
logCacheLoadStats(watch.getTime(), resolvedIdToTaxonMap.size());
watch.reset();
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForCruaud method importStudy.
@Override
public void importStudy() throws StudyImporterException {
LabeledCSVParser dataParser;
try {
dataParser = parserFactory.createParser(RESOURCE_PATH, CharsetConstant.UTF8);
} catch (IOException e) {
throw new StudyImporterException("failed to read resource [" + RESOURCE_PATH + "]", e);
}
try {
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("cruaud", SOURCE, "http://dx.doi.org/10.1093/sysbio/sys068", null));
while (dataParser.getLine() != null) {
if (importFilter.shouldImportRecord((long) dataParser.getLastLineNumber())) {
try {
String parasiteName = StringUtils.trim(dataParser.getValueByLabel("Family and Species"));
String hostName = StringUtils.trim(dataParser.getValueByLabel("Natural host Ficus species"));
hostName = StringUtils.replace(hostName, "F.", "Ficus");
if (areNamesAvailable(parasiteName, hostName)) {
Specimen parasite = nodeFactory.createSpecimen(study, new TaxonImpl(parasiteName, null));
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(hostName, null));
parasite.interactsWith(host, InteractType.PARASITE_OF);
String samplingLocation = StringUtils.trim(dataParser.getValueByLabel("Sampling location"));
if (getGeoNamesService().hasTermForLocale(samplingLocation)) {
LatLng pointForLocality = getGeoNamesService().findLatLng(samplingLocation);
if (pointForLocality == null) {
LOG.warn("no location associated with locality [" + samplingLocation + "]");
} else {
Location location = nodeFactory.getOrCreateLocation(new LocationImpl(pointForLocality.getLat(), pointForLocality.getLng(), null, null));
parasite.caughtIn(location);
host.caughtIn(location);
}
} else {
LOG.warn("no location associated with locality [" + samplingLocation + "]");
}
}
} catch (NodeFactoryException | NumberFormatException e) {
throw new StudyImporterException("failed to import line [" + (dataParser.lastLineNumber() + 1) + "]", e);
}
}
}
} catch (IOException e) {
throw new StudyImporterException("problem importing [" + RESOURCE_PATH + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForDunne method importStudy.
@Override
public void importStudy() throws StudyImporterException {
Study study = createStudy();
try {
LabeledCSVParser nodes = parserFactory.createParser(getNodesResourceName(), CharsetConstant.UTF8);
nodes.changeDelimiter(getDelimiter());
Map<Integer, Taxon> taxonForNode = new HashMap<Integer, Taxon>();
while (nodes.getLine() != null) {
Integer nodeId = getNodeId(nodes);
if (nodeId != null) {
final String tsn = nodes.getValueByLabel("TSN");
taxonForNode.put(nodeId, new TaxonImpl(nodes.getValueByLabel("Name"), TaxonomyProvider.ID_PREFIX_ITIS + tsn));
}
}
LabeledCSVParser links = parserFactory.createParser(getLinksResourceName(), CharsetConstant.UTF8);
links.changeDelimiter(getDelimiter());
while (links.getLine() != null) {
List<Location> locations = new ArrayList<>();
if (getLocation() != null) {
Location loc = nodeFactory.getOrCreateLocation(new LocationImpl(getLocation().getLat(), getLocation().getLng(), null, null));
if (loc != null) {
locations.add(loc);
}
}
for (Location location : locations) {
addLink(study, taxonForNode, links, location);
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to find data file(s)", e);
} catch (NodeFactoryException e) {
throw new StudyImporterException("failed to create nodes", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGlobalWebDb method parseDietMatrix.
static void parseDietMatrix(InteractionListener listener, String dietMatrixWithCitation, String sourceCitation) throws IOException, StudyImporterException {
String[] rows = dietMatrixWithCitation.split("\r\n");
if (rows.length > 0) {
String citation = rows[0].replaceAll("^\"", "").replaceAll("\",*$", "");
List<String> matrixRows = Arrays.asList(rows).subList(1, rows.length);
String matrix = org.apache.commons.lang.StringUtils.join(matrixRows, "\n");
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(IOUtils.toInputStream(matrix));
String[] headerColumns = parser.getLabels();
if (headerColumns.length > 1) {
String[] split1 = headerColumns[0].split("-");
String habitat = split1[0];
List<String> localityList = Arrays.asList(split1).subList(1, split1.length);
String locality = localityList.stream().map(String::trim).collect(Collectors.joining(", "));
Map<String, String> props = new TreeMap<String, String>() {
{
put(StudyImporterForTSV.HABITAT_NAME, org.apache.commons.lang.StringUtils.trim(habitat));
put(StudyImporterForTSV.LOCALITY_NAME, org.apache.commons.lang.StringUtils.trim(locality));
put(StudyImporterForTSV.INTERACTION_TYPE_NAME, InteractType.ATE.getLabel());
put(StudyImporterForTSV.INTERACTION_TYPE_ID, InteractType.ATE.getIRI());
put(StudyImporterForTSV.REFERENCE_ID, MD5.getHashString(citation));
put(StudyImporterForTSV.REFERENCE_CITATION, citation);
put(StudyImporterForTSV.STUDY_SOURCE_CITATION, sourceCitation);
}
};
List<String> sourceTaxa = Arrays.asList(headerColumns).subList(1, headerColumns.length);
while (parser.getLine() != null) {
for (String sourceTaxon : sourceTaxa) {
String value = parser.getValueByLabel(sourceTaxon);
String targetTaxon = parser.getValueByLabel(headerColumns[0]);
if (NumberUtils.isDigits(value) && Integer.parseInt(value) > 0) {
listener.newLink(new TreeMap<String, String>(props) {
{
put(StudyImporterForTSV.SOURCE_TAXON_NAME, org.apache.commons.lang.StringUtils.trim(sourceTaxon));
put(StudyImporterForTSV.TARGET_TAXON_NAME, org.apache.commons.lang.StringUtils.trim(targetTaxon));
}
});
}
}
}
}
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGoMexSI2 method addReferences.
protected void addReferences(Map<String, Study> referenceIdToStudy) throws StudyImporterException {
String referenceResource = getReferencesResourcePath();
try {
LabeledCSVParser parser = parserFactory.createParser(referenceResource, CharsetConstant.UTF8);
Map<String, String> studyContributorMap = collectContributors(referenceResource, parser);
parser = parserFactory.createParser(referenceResource, CharsetConstant.UTF8);
while (parser.getLine() != null) {
String refId = getMandatoryValue(referenceResource, parser, "DATA_ID");
Study study = referenceIdToStudy.get(refId);
if (study == null) {
addNewStudy(referenceIdToStudy, referenceResource, parser, refId, studyContributorMap.get(refId));
}
}
} catch (IOException e) {
throw new StudyImporterException("failed to open resource [" + referenceResource + "]", e);
}
}
Aggregations