Search in sources :

Example 61 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForGemina method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    String studyResource = "gemina_search_2008-01-03.txt";
    try {
        String source = "Schriml, L. M., Arze, C., Nadendla, S., Ganapathy, A., Felix, V., Mahurkar, A., … Hall, N. (2009). GeMInA, Genomic Metadata for Infectious Agents, a geospatial surveillance pathogen database. Nucleic Acids Research, 38(Database), D754–D764. doi:10.1093/nar/gkp832";
        Study study = nodeFactory.getOrCreateStudy(new StudyImpl(source, source, "doi:10.1093/nar/gkp832", source));
        LabeledCSVParser parser = parserFactory.createParser(studyResource, "UTF-8");
        parser.changeDelimiter('\t');
        String[] line;
        while ((line = parser.getLine()) != null) {
            if (line.length > 7) {
                String pathogenId = parser.getValueByLabel("Pathogen Taxonomy");
                String pathogenExternalId = StringUtils.isBlank(pathogenId) ? null : TaxonomyProvider.NCBI.getIdPrefix() + pathogenId;
                Specimen pathogen = nodeFactory.createSpecimen(study, new TaxonImpl(parser.getValueByLabel("Pathogen"), pathogenExternalId));
                String hostId = line[7];
                String hostReservoirExternalId = StringUtils.isBlank(hostId) ? null : TaxonomyProvider.NCBI.getIdPrefix() + hostId;
                Specimen host = nodeFactory.createSpecimen(study, new TaxonImpl(parser.getValueByLabel("Host/Reservoir"), hostReservoirExternalId));
                pathogen.interactsWith(host, InteractType.PATHOGEN_OF);
            }
        }
    } catch (IOException | NodeFactoryException e) {
        throw new StudyImporterException("failed to import [" + studyResource + "]", e);
    }
}
Also used : Study(org.eol.globi.domain.Study) Specimen(org.eol.globi.domain.Specimen) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException)

Example 62 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class CSVTSVUtil method createParser.

public static LabeledCSVParser createParser(File tmpFile, ZipInputStream zis) throws IOException {
    LabeledCSVParser dietParser;
    streamToFile(tmpFile, zis);
    Reader reader = FileUtils.getUncompressedBufferedReader(new FileInputStream(tmpFile), "UTF-8");
    dietParser = createLabeledCSVParser(reader);
    return dietParser;
}
Also used : Reader(java.io.Reader) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) FileInputStream(java.io.FileInputStream)

Example 63 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForBioInfoTest method importReferences.

@Test
public void importReferences() throws IOException {
    String firstFewlines = "BioInfo reference id,BioInfo url,author,year,title,reference type,edition,BioInfo reference id of the source (journal/book/publisher etc),source author,source title,source journal short title,source year,source reference type,source ISSN/ISBN,volume,series,page range,no of pages,ISSN/ISBN,URL of online source\n" + "\"149326\",\"www.bioinfo.org.uk/html/b149326.htm\",\"\",\"\",\"Agrobacterium tumefaciens\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://en.wikipedia.org/Agrobacterium_tumefaciens\"\n" + "\"147341\",\"www.bioinfo.org.uk/html/b147341.htm\",\"\",\"\",\"www.seabean.com\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.seabean.com\"\n" + "\"148459\",\"www.bioinfo.org.uk/html/b148459.htm\",\"\",\"\",\"British Leafminers\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.leafmines.co.uk/\"\n" + "\"148671\",\"www.bioinfo.org.uk/html/b148671.htm\",\"\",\"\",\"Sawflies discussion group\",\"E-forum\",\"\",\"148672\",\"\",\"Yahoo\",\"\",\"\",\"Publisher\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://tech.groups.yahoo.com/group/sawfly/join\"\n" + "\"149380\",\"www.bioinfo.org.uk/html/b149380.htm\",\"\",\"\",\"Cuttlefish\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.pznow.co.uk/marine/cuttlefish.html\"\n" + "\"149878\",\"www.bioinfo.org.uk/html/b149878.htm\",\"\",\"\",\"The Marine Life Information Network for Britain and Ireland (MarLIN)\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.marlin.ac.uk\"\n" + "\"150118\",\"www.bioinfo.org.uk/html/b150118.htm\",\"\",\"2008\",\"Bacterial bleeding canker of horse chestnut\",\"Paper\",\"\",\"150094\",\"FERA\",\"Plant Clinic News\",\"\",\"\",\"Journal\",\"\",\"May 08\",\"\",\"2\",\"1\",\"\",\"\"\n" + "\"150071\",\"www.bioinfo.org.uk/html/b150071.htm\",\"\",\"\",\"Pyrenopeziza brassicae - CropMonitor\",\"Web Site/Page\",\"\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"http://www.cropmonitor.co.uk/wosr/encyclopaedia/view_icard.cfm?cslref=12680\"\n" + "\"60527\",\"www.bioinfo.org.uk/html/b60527.htm\",\"Bullock, J.A.\",\"1992\",\"Host Plants of British Beetles: A List of Recorded Associations\",\"Book/Report\",\"\",\"147501\",\"\",\"Amateur Entomologists' Society\",\"AES\",\"\",\"Publisher\",\"\",\"11a\",\"\",\"\",\"24\",\"0 900054 56 5\",\"\"\n" + "\"150095\",\"www.bioinfo.org.uk/html/b150095.htm\",\"\",\"2009\",\"Verbena downy mildew\",\"Paper\",\"\",\"150094\",\"FERA\",\"Plant Clinic News\",\"\",\"\",\"Journal\",\"\",\"Sept 09\",\"\",\"1\",\"\",\"\",\"\"\n";
    final LabeledCSVParser parser = createParser(firstFewlines);
    Map<String, String> refIdMap = StudyImporterForBioInfo.buildRefMap(parser);
    assertThat(refIdMap.get("149326"), is("Agrobacterium tumefaciens. Accessed at: http://en.wikipedia.org/Agrobacterium_tumefaciens"));
    assertThat(refIdMap.get("149878"), is("The Marine Life Information Network for Britain and Ireland (MarLIN). Accessed at: http://www.marlin.ac.uk"));
    assertThat(refIdMap.get("150118"), is("Bacterial bleeding canker of horse chestnut. Plant Clinic News. 2008. Vol May 08. pp 2"));
    assertThat(refIdMap.get("150095"), is("Verbena downy mildew. Plant Clinic News. 2009. Vol Sept 09. pp 1"));
    assertThat(refIdMap.get("60527"), is("Bullock, J.A.. 1992. Host Plants of British Beetles: A List of Recorded Associations. Amateur Entomologists' Society. Vol 11a"));
}
Also used : JUnitMatchers.containsString(org.junit.matchers.JUnitMatchers.containsString) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) Test(org.junit.Test)

Example 64 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForBlewettTest method importLines.

@Test
public void importLines() throws StudyImporterException, NodeFactoryException {
    String predatorPreyMapping = "\"Collection #\",\"Sp#\",\"Standard Length\",\"ID\",\"Far duoraum\",\"Cal sapidus\",\"Unid fish\",\"Anchoa spp\",\"Mug gyrans\",\"Bai chrysoura\",\"Portunus spp\",\"Bivalves\",\"Portunidae\",\"Lag rhomboides\",\"Xanthidae\",\"Palaemonidae\",\"Eucinostomus spp\",\"Mugil spp\",\"Alpheidae\",\"Atherinidae\",\"Syn foetens\",\"Ort chrysoptera\",\"Snails\",\"Euc gula\",\"Cynoscion spp\",\"Cyp. Variegatus\",\"Fun majalis\",\"Poe latipinna\",\"Unid crab\",\"Har jaguana\",\"Arm mierii\",\"Fun grandis\",\"Mic gulosus\",\"Ari felis\",\"Clupeidae\",\"Fundulus spp\",\"Diapterus/Eugerres spp\",\"Isopods\",\"Cyn nebulosus\",\"Opi oglinum\",\"Flo carpio\",\"Luc parva\",\"Uca spp\",\"Majidae\",\"Mug cephalus\",\"Squ empusa\",\"Opi robinsi\",\"Ariidae\",\"Sci ocellatus\",\"Unid shrimp\",\"Uca thayeri\",\"Grapsidae\",\"Lei xanthurus\",\"Elo saurus\",\"Brevoortia spp\"\n" + "\"CHD01101502\",1,549,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHD01102504\",1,548,\"E\",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHD01102504\",2,550,,3,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHM000152\",1,580,\"E\",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n" + "\"CHM000152\",2,556,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,";
    String dateLocationString = "\"Collection #\",\"Longitude\",\"Latitude\",\"Time\",\"Date\",\"Temperature\",\"Salinity\"\n" + "\"CHD01101502\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHD01102504\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHM000151\",-82.1625,26.72,10:55:00,1-Mar-00,22.4,33.8\n" + "\"CHM000152\",-82.103833,26.651833,12:40:00,1-Mar-00,24.8,30.3\n" + "\"CHM000153\",-82.087333,26.644833,13:40:00,1-Mar-00,25.1,30.1\n" + "\"CHM000154\",-82.083167,26.671167,14:40:00,1-Mar-00,26,30.4\n" + "\"CHM000175\",-82.197833,26.688167,10:00:00,8-Mar-00,22.2,35.05\n" + "\"CHM000176\",-82.191333,26.667333,11:00:00,8-Mar-00,22.7,35.25";
    final TestParserFactory preyPredatorFactory = new TestParserFactory(predatorPreyMapping);
    final TestParserFactory dateLocationFactory = new TestParserFactory(dateLocationString);
    ParserFactory testFactory = new ParserFactory() {

        @Override
        public LabeledCSVParser createParser(String studyResource, String characterEncoding) throws IOException {
            LabeledCSVParser parser = null;
            if (studyResource.contains("abundance")) {
                parser = preyPredatorFactory.createParser(studyResource, characterEncoding);
            } else {
                parser = dateLocationFactory.createParser(studyResource, characterEncoding);
            }
            return parser;
        }
    };
    StudyImporter importer = new StudyImporterTestFactory(testFactory, nodeFactory).instantiateImporter((Class) StudyImporterForBlewett.class);
    importStudy(importer);
    Study study = getStudySingleton(getGraphDb());
    Iterable<Relationship> collectedRels = NodeUtil.getSpecimens(study);
    Relationship collectedRel = collectedRels.iterator().next();
    Date unixEpochProperty = nodeFactory.getUnixEpochProperty(new SpecimenNode(collectedRel.getEndNode()));
    assertThat(unixEpochProperty, is(not(nullValue())));
    assertThat(dateToString(unixEpochProperty), is("2000-03-01T10:55:00.000-06:00"));
    Node predatorNode = collectedRel.getEndNode();
    assertThat((String) predatorNode.getProperty(SpecimenConstant.LIFE_STAGE_LABEL), is("post-juvenile adult stage"));
    assertThat((String) predatorNode.getProperty(SpecimenConstant.LIFE_STAGE_ID), is("UBERON:0000113"));
    assertThat((Double) predatorNode.getProperty(SpecimenConstant.LENGTH_IN_MM), is(549.0));
    Node predatorTaxonNode = predatorNode.getRelationships(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING).iterator().next().getEndNode();
    assertThat((String) predatorTaxonNode.getProperty(PropertyAndValueDictionary.NAME), is("Centropomus undecimalis"));
    Iterable<Relationship> ate = predatorNode.getRelationships(NodeUtil.asNeo4j(InteractType.ATE), Direction.OUTGOING);
    Node preyNode = ate.iterator().next().getEndNode();
    assertThat(preyNode, is(not(nullValue())));
    Node taxonNode = preyNode.getRelationships(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING).iterator().next().getEndNode();
    assertThat(taxonNode, is(not(nullValue())));
    assertThat((String) taxonNode.getProperty(PropertyAndValueDictionary.NAME), is("Lag rhomboides"));
    Iterator<Relationship> i = collectedRels.iterator();
    i.next();
    collectedRel = i.next();
    predatorNode = collectedRel.getEndNode();
    assertThat((Double) predatorNode.getProperty(SpecimenConstant.LENGTH_IN_MM), is(548.0));
    ate = predatorNode.getRelationships(NodeUtil.asNeo4j(InteractType.ATE), Direction.OUTGOING);
    assertThat(ate.iterator().hasNext(), is(false));
    Location location = nodeFactory.findLocation(new LocationImpl(26.651833, -82.103833, 0.0, null));
    assertThat(location, is(not(nullValue())));
    Iterable<Relationship> specimenCaughtHere = NodeUtil.getSpecimenCaughtHere(location);
    Iterator<Relationship> iterator = specimenCaughtHere.iterator();
    assertThat(iterator.hasNext(), is(true));
    iterator.next();
    assertThat(iterator.hasNext(), is(true));
    iterator.next();
    assertThat(iterator.hasNext(), is(true));
    iterator.next();
    assertThat(iterator.hasNext(), is(false));
}
Also used : Study(org.eol.globi.domain.Study) Node(org.neo4j.graphdb.Node) SpecimenNode(org.eol.globi.domain.SpecimenNode) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) SpecimenNode(org.eol.globi.domain.SpecimenNode) Date(java.util.Date) Relationship(org.neo4j.graphdb.Relationship) LocationImpl(org.eol.globi.domain.LocationImpl) Location(org.eol.globi.domain.Location) Test(org.junit.Test)

Example 65 with LabeledCSVParser

use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.

the class StudyImporterForBioInfoTest method parseSomeRelations.

@Test
public void parseSomeRelations() throws IOException, StudyImporterException {
    assertThat(taxonIndex.findTaxonByName("Homo sapiens"), is(nullValue()));
    LabeledCSVParser labeledCSVParser = createParser(RELATIONS_STRING);
    StudyImporterForBioInfo importer = new StudyImporterForBioInfo(new ParserFactoryLocal(), nodeFactory);
    importer.createRelations(labeledCSVParser, new HashMap<String, String>() {

        {
            put("60527", "citation A");
            put("60536", "citation B");
        }
    }, new HashMap<>());
    resolveNames();
    Study study = nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:60536");
    assertNotNull(study);
    assertThat(study.getExternalId(), is("http://bioinfo.org.uk/html/b60536.htm"));
    assertNull(nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:bla"));
    Study study1 = nodeFactory.findStudy(TaxonomyProvider.BIO_INFO + "ref:60527");
    assertThat(study1.getCitation(), is("citation A"));
    assertThat(study1, is(notNullValue()));
    Iterable<Relationship> specimens = NodeUtil.getSpecimens(study1);
    List<Node> specimenList = new ArrayList<Node>();
    for (Relationship specimen : specimens) {
        assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING), is(notNullValue()));
        assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.OUTGOING), is(notNullValue()));
        assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.INCOMING), is(notNullValue()));
        assertThat(specimen.getEndNode().getSingleRelationship(NodeUtil.asNeo4j(InteractType.INTERACTS_WITH), Direction.INCOMING), is(notNullValue()));
        specimenList.add(specimen.getEndNode());
    }
    assertThat(specimenList.size(), is(16));
    Relationship classifiedAs = specimenList.get(0).getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING);
    assertThat(classifiedAs, is(notNullValue()));
    assertThat((String) classifiedAs.getEndNode().getProperty(PropertyAndValueDictionary.EXTERNAL_ID), is("NBN:NBNSYS0000003949"));
    assertThat(specimenList.get(1).getSingleRelationship(NodeUtil.asNeo4j(RelTypes.CLASSIFIED_AS), Direction.OUTGOING), is(notNullValue()));
    assertThat(taxonIndex.findTaxonById(TaxonomyProvider.NBN.getIdPrefix() + "NBNSYS0000024889"), is(notNullValue()));
    assertThat(taxonIndex.findTaxonById(TaxonomyProvider.NBN.getIdPrefix() + "NBNSYS0000024891"), is(notNullValue()));
}
Also used : Study(org.eol.globi.domain.Study) Relationship(org.neo4j.graphdb.Relationship) Node(org.neo4j.graphdb.Node) SpecimenNode(org.eol.globi.domain.SpecimenNode) ArrayList(java.util.ArrayList) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) JUnitMatchers.containsString(org.junit.matchers.JUnitMatchers.containsString) Test(org.junit.Test)

Aggregations

LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)82 IOException (java.io.IOException)40 Test (org.junit.Test)31 Study (org.eol.globi.domain.Study)24 StudyImpl (org.eol.globi.domain.StudyImpl)17 Specimen (org.eol.globi.domain.Specimen)15 HashMap (java.util.HashMap)13 ArrayList (java.util.ArrayList)12 Location (org.eol.globi.domain.Location)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)12 CSVParser (com.Ostermiller.util.CSVParser)10 StringReader (java.io.StringReader)8 LocationImpl (org.eol.globi.domain.LocationImpl)8 Taxon (org.eol.globi.domain.Taxon)8 InteractType (org.eol.globi.domain.InteractType)7 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 InputStream (java.io.InputStream)6 Date (java.util.Date)6 List (java.util.List)6