use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGoMexSI2 method addSpecimen.
private void addSpecimen(String datafile, String scientificNameLabel, ParseEventHandler specimenListener) throws StudyImporterException {
try {
LabeledCSVParser parser = parserFactory.createParser(datafile, CharsetConstant.UTF8);
parseSpecimen(datafile, scientificNameLabel, specimenListener, parser);
} catch (IOException e) {
throw new StudyImporterException("failed to open resource [" + datafile + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class ParserFactoryForDatasetTest method parserWithDatasetContextLocalResource.
@Test
public void parserWithDatasetContextLocalResource() throws IOException {
ParserFactoryForDataset parserFactory = new ParserFactoryForDataset(new DatasetLocal());
LabeledCSVParser parser = parserFactory.createParser("classpath:/org/eol/globi/data/someResource.csv", "UTF-8");
assertThat(parser.getLine(), is(new String[] { "valueA", "valueB" }));
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBioInfoTest method importTaxa.
@Test
public void importTaxa() throws IOException {
String firstFewlines = "my taxon id,rank,latin,authority,english,NBN Code,family,order,phylum,url\n" + "\"268\",\"Informal\",\"'Chenopodiaceae'\",\"\",\"the old Chenopodiaceae\",\"\",\"Amaranthaceae\",\"Caryophyllales\",\"Tracheophyta\",\"www.bioinfo.org.uk/html/t268.htm\"\n" + "\"162827\",\"Species\",\"Abacarus hystrix\",\"(Nalepa, 1896)\",\"a mite\",\"NHMSYS0020190380\",\"Eriophyidae\",\"Trombidiformes\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t162827.htm\"\n" + "\"41886\",\"Genus\",\"Abdera\",\"Stephens, 1832\",\"a genus of false darkling beetles\",\"NHMSYS0020151134\",\"Melandryidae\",\"Coleoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t41886.htm\"\n" + "\"34737\",\"Species\",\"Abdera biflexuosa\",\"(Curtis, 1829)\",\"a false darkling beetle\",\"NBNSYS0000024889\",\"Melandryidae\",\"Coleoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t34737.htm\"\n" + "\"34738\",\"Species\",\"Abdera flexuosa\",\"(Paykull, 1799)\",\"a false darkling beetle\",\"NBNSYS0000024890\",\"Melandryidae\",\"Coleoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t34738.htm\"\n" + "\"34739\",\"Species\",\"Abdera quadrifasciata\",\"(Curtis, 1829)\",\"a false darkling beetle\",\"NBNSYS0000024891\",\"Melandryidae\",\"Coleoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t34739.htm\"\n" + "\"34740\",\"Species\",\"Abdera triguttata\",\"(Gyllenhal, 1810)\",\"a false darkling beetle\",\"NBNSYS0000024892\",\"Melandryidae\",\"Coleoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t34740.htm\"\n" + "\"102829\",\"Species\",\"Abia sericea\",\"(Linnaeus, 1767)\",\"a clubhorned sawfly\",\"NHMSYS0020480647\",\"Cimbicidae\",\"Hymenoptera\",\"Arthropoda\",\"www.bioinfo.org.uk/html/t102829.htm\"\n" + "\"43913\",\"Genus\",\"Abies\",\"Mill.\",\"firs\",\"NHMSYS0000455511\",\"Pinaceae\",\"Pinales\",\"Tracheophyta\",\"www.bioinfo.org.uk/html/t43913.htm\"\n";
final LabeledCSVParser parser = createParser(firstFewlines);
Map<String, Taxon> taxonMap = StudyImporterForBioInfo.buildTaxonMap(parser);
assertThat(taxonMap.get("268").getName(), is("Chenopodiaceae"));
assertThat(taxonMap.get("41886"), is(nullValue()));
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class TaxonCacheService method initTaxonIdMap.
private void initTaxonIdMap() throws PropertyEnricherException {
try {
LOG.info("taxon lookup service instantiating...");
File luceneDir = new File(getCacheDir().getAbsolutePath(), "lucene");
boolean preexisting = luceneDir.exists();
createCacheDir(luceneDir, isTemporary());
TaxonLookupServiceImpl taxonLookupService = new TaxonLookupServiceImpl(new SimpleFSDirectory(luceneDir));
taxonLookupService.setMaxHits(getMaxTaxonLinks());
taxonLookupService.start();
if (!isTemporary() && preexisting) {
LOG.info("pre-existing taxon lookup index found, no need to re-index...");
} else {
LOG.info("no pre-existing taxon lookup index found, re-indexing...");
int count = 0;
LOG.info("taxon map loading [" + taxonMapResource + "] ...");
StopWatch watch = new StopWatch();
watch.start();
BufferedReader reader = createBufferedReader(taxonMapResource);
final LabeledCSVParser labeledCSVParser = CSVTSVUtil.createLabeledTSVParser(reader);
while (labeledCSVParser.getLine() != null) {
Taxon provided = TaxonMapParser.parseProvidedTaxon(labeledCSVParser);
Taxon resolved = TaxonMapParser.parseResolvedTaxon(labeledCSVParser);
addIfNeeded(taxonLookupService, provided.getExternalId(), resolved.getExternalId());
addIfNeeded(taxonLookupService, provided.getName(), resolved.getExternalId());
addIfNeeded(taxonLookupService, resolved.getName(), resolved.getExternalId());
count++;
}
watch.stop();
logCacheLoadStats(watch.getTime(), count);
LOG.info("taxon map loading [" + taxonMapResource + "] done.");
}
taxonLookupService.finish();
this.taxonLookupService = taxonLookupService;
LOG.info("taxon lookup service instantiating done.");
} catch (IOException e) {
throw new PropertyEnricherException("problem initiating taxon cache index", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class TaxonCacheService method taxonCacheIterator.
public static Iterator<Fun.Tuple2<String, Map<String, String>>> taxonCacheIterator(final String resource, final LineSkipper skipper) throws IOException {
return new Iterator<Fun.Tuple2<String, Map<String, String>>>() {
private BufferedReader reader = createBufferedReader(resource);
private final LabeledCSVParser labeledCSVParser = CSVTSVUtil.createLabeledTSVParser(reader);
private AtomicBoolean lineReady = new AtomicBoolean(false);
@Override
public boolean hasNext() {
try {
boolean hasNext;
do {
hasNext = lineReady.get() || consumeLine(labeledCSVParser);
} while (hasNext && skipper.shouldSkipLine(labeledCSVParser));
return hasNext;
} catch (IOException e) {
LOG.error("failed to get next line", e);
return false;
}
}
private boolean consumeLine(LabeledCSVParser labeledCSVParser) throws IOException {
boolean hasNext = labeledCSVParser.getLine() != null;
if (skipper.shouldSkipLine(labeledCSVParser)) {
lineReady.set(false);
} else {
lineReady.set(hasNext);
}
return hasNext;
}
@Override
public Fun.Tuple2<String, Map<String, String>> next() {
final Taxon taxon = TaxonCacheParser.parseLine(labeledCSVParser);
lineReady.set(false);
return new Fun.Tuple2<>(valueOrNoMatch(taxon.getExternalId()), TaxonUtil.taxonToMap(taxon));
}
public void remove() {
throw new UnsupportedOperationException("remove");
}
};
}
Aggregations