use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGemina method importStudy.
@Override
public void importStudy() throws StudyImporterException {
String studyResource = "gemina_search_2008-01-03.txt";
try {
String source = "Schriml, L. M., Arze, C., Nadendla, S., Ganapathy, A., Felix, V., Mahurkar, A., … Hall, N. (2009). GeMInA, Genomic Metadata for Infectious Agents, a geospatial surveillance pathogen database. Nucleic Acids Research, 38(Database), D754–D764. doi:10.1093/nar/gkp832";
Study study = nodeFactory.getOrCreateStudy(new StudyImpl(source, source, "doi:10.1093/nar/gkp832", source));
LabeledCSVParser parser = parserFactory.createParser(studyResource, "UTF-8");
parser.changeDelimiter('\t');
String[] line;
while ((line = parser.getLine()) != null) {
if (line.length > 7) {
String pathogenId = parser.getValueByLabel("Pathogen Taxonomy");
String pathogenExternalId = StringUtils.isBlank(pathogenId) ? null : TaxonomyProvider.NCBI.getIdPrefix() + pathogenId;
Specimen pathogen = nodeFactory.createSpecimen(study, new TaxonImpl(parser.getValueByLabel("Pathogen"), pathogenExternalId));
String hostId = line[7];
String hostReservoirExternalId = StringUtils.isBlank(hostId) ? null : TaxonomyProvider.NCBI.getIdPrefix() + hostId;
Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(parser.getValueByLabel("Host/Reservoir"), hostReservoirExternalId));
pathogen.interactsWith(host, InteractType.PATHOGEN_OF);
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException("failed to import [" + studyResource + "]", e);
}
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class CSVTSVUtil method createParser.
public static LabeledCSVParser createParser(File tmpFile, ZipInputStream zis) throws IOException {
LabeledCSVParser dietParser;
streamToFile(tmpFile, zis);
Reader reader = FileUtils.getUncompressedBufferedReader(new FileInputStream(tmpFile), "UTF-8");
dietParser = createLabeledCSVParser(reader);
return dietParser;
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBioInfoTest method importReferences.
@Test
public void importReferences() throws IOException {
String firstFewlines = "BioInfo reference id,BioInfo url,author,year,title,reference type,edition,BioInfo reference id of the source (journal/book/publisher etc),source author,source title,source journal short title,source year,source reference type,source ISSN/ISBN,volume,series,page range,no of pages,ISSN/ISBN,URL of online source\n" + "\"149326\",\"www.bioinfo.org.uk/html/b149326.htm\",\"\",\"\",\"Agrobacterium tumefaciens\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://en.wikipedia.org/Agrobacterium_tumefaciens\"\n" + "\"147341\",\"www.bioinfo.org.uk/html/b147341.htm\",\"\",\"\",\"www.seabean.com\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.seabean.com\"\n" + "\"148459\",\"www.bioinfo.org.uk/html/b148459.htm\",\"\",\"\",\"British Leafminers\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.leafmines.co.uk/\"\n" + "\"148671\",\"www.bioinfo.org.uk/html/b148671.htm\",\"\",\"\",\"Sawflies discussion group\",\"E-forum\",\"\",\"148672\",\"\",\"Yahoo\",\"\",\"\",\"Publisher\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://tech.groups.yahoo.com/group/sawfly/join\"\n" + "\"149380\",\"www.bioinfo.org.uk/html/b149380.htm\",\"\",\"\",\"Cuttlefish\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.pznow.co.uk/marine/cuttlefish.html\"\n" + "\"149878\",\"www.bioinfo.org.uk/html/b149878.htm\",\"\",\"\",\"The Marine Life Information Network for Britain and Ireland (MarLIN)\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.marlin.ac.uk\"\n" + "\"150118\",\"www.bioinfo.org.uk/html/b150118.htm\",\"\",\"2008\",\"Bacterial bleeding canker of horse chestnut\",\"Paper\",\"\",\"150094\",\"FERA\",\"Plant Clinic News\",\"\",\"\",\"Journal\",\"\",\"May 08\",\"\",\"2\",\"1\",\"\",\"\"\n" + "\"150071\",\"www.bioinfo.org.uk/html/b150071.htm\",\"\",\"\",\"Pyrenopeziza brassicae - CropMonitor\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.cropmonitor.co.uk/wosr/encyclopaedia/view_icard.cfm?cslref=12680\"\n" + "\"60527\",\"www.bioinfo.org.uk/html/b60527.htm\",\"Bullock, J.A.\",\"1992\",\"Host Plants of British Beetles: A List of Recorded Associations\",\"Book/Report\",\"\",\"147501\",\"\",\"Amateur Entomologists' Society\",\"AES\",\"\",\"Publisher\",\"\",\"11a\",\"\",\"\",\"24\",\"0 900054 56 5\",\"\"\n" + "\"150095\",\"www.bioinfo.org.uk/html/b150095.htm\",\"\",\"2009\",\"Verbena downy mildew\",\"Paper\",\"\",\"150094\",\"FERA\",\"Plant Clinic News\",\"\",\"\",\"Journal\",\"\",\"Sept 09\",\"\",\"1\",\"\",\"\",\"\"\n";
final LabeledCSVParser parser = createParser(firstFewlines);
Map<String, String> refIdMap = StudyImporterForBioInfo.buildRefMap(parser);
assertThat(refIdMap.get("149326"), is("Agrobacterium tumefaciens. Accessed at: http://en.wikipedia.org/Agrobacterium_tumefaciens"));
assertThat(refIdMap.get("149878"), is("The Marine Life Information Network for Britain and Ireland (MarLIN). Accessed at: http://www.marlin.ac.uk"));
assertThat(refIdMap.get("150118"), is("Bacterial bleeding canker of horse chestnut. Plant Clinic News. 2008. Vol May 08. pp 2"));
assertThat(refIdMap.get("150095"), is("Verbena downy mildew. Plant Clinic News. 2009. Vol Sept 09. pp 1"));
assertThat(refIdMap.get("60527"), is("Bullock, J.A.. 1992. Host Plants of British Beetles: A List of Recorded Associations. Amateur Entomologists' Society. Vol 11a"));
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBlewettTest method importLines.
@Test
public void importLines() throws StudyImporterException, NodeFactoryException {
String predatorPreyMapping = "\"Collection #\",\"Sp#\",\"Standard Length\",\"ID\",\"Far duoraum\",\"Cal sapidus\",\"Unid fish\",\"Anchoa spp\",\"Mug gyrans\",\"Bai chrysoura\",\"Portunus spp\",\"Bivalves\",\"Portunidae\",\"Lag rhomboides\",\"Xanthidae\",\"Palaemonidae\",\"Eucinostomus spp\",\"Mugil spp\",\"Alpheidae\",\"Atherinidae\",\"Syn foetens\",\"Ort chrysoptera\",\"Snails\",\"Euc gula\",\"Cynoscion spp\",\"Cyp. Variegatus\",\"Fun majalis\",\"Poe latipinna\",\"Unid crab\",\"Har jaguana\",\"Arm mierii\",\"Fun grandis\",\"Mic gulosus\",\"Ari felis\",\"Clupeidae\",\"Fundulus spp\",\"Diapterus/Eugerres spp\",\"Isopods\",\"Cyn nebulosus\",\"Opi oglinum\",\"Flo carpio\",\"Luc parva\",\"Uca spp\",\"Majidae\",\"Mug cephalus\",\"Squ empusa\",\"Opi robinsi\",\"Ariidae\",\"Sci ocellatus\",\"Unid shrimp\",\"Uca thayeri\",\"Grapsidae\",\"Lei xanthurus\",\"Elo saurus\",\"Brevoortia spp\"\n" + "\"CHD01101502\",1,549,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHD01102504\",1,548,\"E\",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHD01102504\",2,550,,3,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHM000152\",1,580,\"E\",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHM000152\",2,556,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,";
String dateLocationString = "\"Collection #\",\"Longitude\",\"Latitude\",\"Time\",\"Date\",\"Temperature\",\"Salinity\"\n" + "\"CHD01101502\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHD01102504\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHM000151\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHM000152\",-82.103833,26.651833,12:40:00,1-Mar-00,24.8,30.3\n" + "\"CHM000153\",-82.087333,26.644833,13:40:00,1-Mar-00,25.1,30.1\n" + "\"CHM000154\",-82.083167,26.671167,14:40:00,1-Mar-00,26,30.4\n" + "\"CHM000175\",-82.197833,26.688167,10:00:00,8-Mar-00,22.2,35.05\n" + "\"CHM000176\",-82.191333,26.667333,11:00:00,8-Mar-00,22.7,35.25";
final TestParserFactory preyPredatorFactory = new TestParserFactory(predatorPreyMapping);
final TestParserFactory dateLocationFactory = new TestParserFactory(dateLocationString);
ParserFactory testFactory = new ParserFactory() {
@Override
public LabeledCSVParser createParser(String studyResource, String characterEncoding) throws IOException {
LabeledCSVParser parser = null;
if (studyResource.contains("abundance")) {
parser = preyPredatorFactory.createParser(studyResource, characterEncoding);
} else {
parser = dateLocationFactory.createParser(studyResource, characterEncoding);
}
return parser;
}
};
StudyImporter importer = new StudyImporterTestFactory(testFactory, nodeFactory).instantiateImporter((Class) StudyImporterForBlewett.class);
importStudy(importer);
Study study = getStudySingleton(getGraphDb());
Iterable<Relationship> collectedRels = NodeUtil.getSpecimens(study);
Relationship collectedRel = collectedRels.iterator().next();
Date unixEpochProperty = nodeFactory.getUnixEpochProperty(new SpecimenNode(collectedRel.getEndNode()));
assertThat(unixEpochProperty, is(not(nullValue())));
assertThat(dateToString(unixEpochProperty), is("2000-03-01T10:55:00.000-06:00"));
Node predatorNode = collectedRel.getEndNode();
assertThat((String) predatorNode.getProperty(SpecimenConstant.LIFE_STAGE_LABEL), is("post-juvenile adult stage"));
assertThat((String) predatorNode.getProperty(SpecimenConstant.LIFE_STAGE_ID), is("UBERON:0000113"));
assertThat((Double) predatorNode.getProperty(SpecimenConstant.LENGTH_IN_MM), is(549.0));
Node predatorTaxonNode = predatorNode.getRelationships(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING).iterator().next().getEndNode();
assertThat((String) predatorTaxonNode.getProperty(PropertyAndValueDictionary.NAME), is("Centropomus undecimalis"));
Iterable<Relationship> ate = predatorNode.getRelationships(NodeUtil.asNeo4j(InteractType.ATE), Direction.OUTGOING);
Node preyNode = ate.iterator().next().getEndNode();
assertThat(preyNode, is(not(nullValue())));
Node taxonNode = preyNode.getRelationships(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING).iterator().next().getEndNode();
assertThat(taxonNode, is(not(nullValue())));
assertThat((String) taxonNode.getProperty(PropertyAndValueDictionary.NAME), is("Lag rhomboides"));
Iterator<Relationship> i = collectedRels.iterator();
i.next();
collectedRel = i.next();
predatorNode = collectedRel.getEndNode();
assertThat((Double) predatorNode.getProperty(SpecimenConstant.LENGTH_IN_MM), is(548.0));
ate = predatorNode.getRelationships(NodeUtil.asNeo4j(InteractType.ATE), Direction.OUTGOING);
assertThat(ate.iterator().hasNext(), is(false));
Location location = nodeFactory.findLocation(new LocationImpl(26.651833, -82.103833, 0.0, null));
assertThat(location, is(not(nullValue())));
Iterable<Relationship> specimenCaughtHere = NodeUtil.getSpecimenCaughtHere(location);
Iterator<Relationship> iterator = specimenCaughtHere.iterator();
assertThat(iterator.hasNext(), is(true));
iterator.next();
assertThat(iterator.hasNext(), is(true));
iterator.next();
assertThat(iterator.hasNext(), is(true));
iterator.next();
assertThat(iterator.hasNext(), is(false));
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForBioInfoTest method parseSomeRelations.
@Test
public void parseSomeRelations() throws IOException, StudyImporterException {
assertThat(taxonIndex.findTaxonByName("Homo sapiens"), is(nullValue()));
LabeledCSVParser labeledCSVParser = createParser(RELATIONS_STRING);
StudyImporterForBioInfo importer = new StudyImporterForBioInfo(new ParserFactoryLocal(), nodeFactory);
importer.createRelations(labeledCSVParser, new HashMap<String, String>() {
{
put("60527", "citation A");
put("60536", "citation B");
}
}, new HashMap<>());
resolveNames();
Study study = nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:60536");
assertNotNull(study);
assertThat(study.getExternalId(), is("http://bioinfo.org.uk/html/b60536.htm"));
assertNull(nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:bla"));
Study study1 = nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:60527");
assertThat(study1.getCitation(), is("citation A"));
assertThat(study1, is(notNullValue()));
Iterable<Relationship> specimens = NodeUtil.getSpecimens(study1);
List<Node> specimenList = new ArrayList<Node>();
for (Relationship specimen : specimens) {
assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING), is(notNullValue()));
assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.OUTGOING), is(notNullValue()));
assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.INCOMING), is(notNullValue()));
assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.INCOMING), is(notNullValue()));
specimenList.add(specimen.getEndNode());
}
assertThat(specimenList.size(), is(16));
Relationship classifiedAs = specimenList.get(0).getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING);
assertThat(classifiedAs, is(notNullValue()));
assertThat((String) classifiedAs.getEndNode().getProperty(PropertyAndValueDictionary.EXTERNAL_ID), is("NBN:NBNSYS0000003949"));
assertThat(specimenList.get(1).getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING), is(notNullValue()));
assertThat(taxonIndex.findTaxonById(TaxonomyProvider.NBN.getIdPrefix() + "NBNSYS0000024889"), is(notNullValue()));
assertThat(taxonIndex.findTaxonById(TaxonomyProvider.NBN.getIdPrefix() + "NBNSYS0000024891"), is(notNullValue()));
}
Aggregations