Search in sources :

Example 36 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class NodeFactoryFactoryTransactingOnDatasetNeo4j2 method create.

@Override
public NodeFactory create(GraphDatabaseService service) {
    GraphDatabaseService graphService = graphServiceFactory.getGraphService();
    try (Transaction tx = graphService.beginTx()) {
        NodeFactory nodeFactory = new NodeFactoryNeo4j2(graphService) {

            final AtomicReference<Transaction> tx = new AtomicReference<>();

            final AtomicBoolean closing = new AtomicBoolean(false);

            final AtomicLong counter = new AtomicLong(0);

            @Override
            public Dataset getOrCreateDataset(Dataset dataset) {
                if (closing.get()) {
                    throw new IllegalStateException("cannot create a dataset on closing node factory");
                } else {
                    startBatchTransactionIfNeeded();
                }
                return super.getOrCreateDataset(dataset);
            }

            void startBatchTransactionIfNeeded() {
                tx.getAndUpdate(transaction -> {
                    if (counter.getAndIncrement() % TRANSACTION_BATCH_SIZE_DEFAULT == 0) {
                        if (transaction != null) {
                            transaction.success();
                            transaction.close();
                            transaction = null;
                        }
                    }
                    return transaction == null ? beginTx() : transaction;
                });
            }

            private Transaction beginTx() {
                return graphServiceFactory.getGraphService().beginTx();
            }

            @Override
            public SpecimenNode createSpecimen(Study study, Taxon taxon, RelTypes... types) throws NodeFactoryException {
                startBatchTransactionIfNeeded();
                return super.createSpecimen(study, taxon, types);
            }

            @Override
            public void close() {
                tx.getAndUpdate(tx -> {
                    closing.set(true);
                    if (tx != null) {
                        tx.success();
                        tx.close();
                    }
                    return null;
                });
            }
        };
        tx.success();
        return nodeFactory;
    }
}
Also used : GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) Study(org.eol.globi.domain.Study) Dataset(org.globalbioticinteractions.dataset.Dataset) Taxon(org.eol.globi.domain.Taxon) AtomicReference(java.util.concurrent.atomic.AtomicReference) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) Transaction(org.neo4j.graphdb.Transaction) NodeFactory(org.eol.globi.data.NodeFactory) RelTypes(org.eol.globi.domain.RelTypes) NodeFactoryNeo4j2(org.eol.globi.data.NodeFactoryNeo4j2)

Example 37 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class CmdGenerateReport method generateCollectionReport.

private void generateCollectionReport(DB reportCache) {
    final Set<Long> distinctTaxonIds = makeOrRemake(reportCache, "distinctTaxonIds");
    final Set<Long> distinctTaxonIdsNoMatch = makeOrRemake(reportCache, "distinctTaxonIdsNoMatch");
    final Counter counter = new Counter();
    final Counter studyCounter = new Counter();
    final Set<String> distinctSources = makeOrRemakeString(reportCache, "distinctSources");
    final Set<String> distinctDatasets = makeOrRemakeString(reportCache, "distinctDatasets");
    NodeUtil.findStudies(getGraphDb(), studyNode -> {
        countInteractionsAndTaxa(distinctTaxonIds, counter, distinctTaxonIdsNoMatch, studyNode);
        studyCounter.count();
        final Dataset originatingDataset = new StudyNode(studyNode).getOriginatingDataset();
        if (originatingDataset != null) {
            final String namespace = originatingDataset.getNamespace();
            distinctSources.add(namespace);
            distinctDatasets.add(namespace);
        }
    });
    final Node node = getGraphDb().createNode();
    node.setProperty(PropertyAndValueDictionary.COLLECTION, GLOBI_COLLECTION_NAME);
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_INTERACTIONS, counter.getCount() / 2);
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA, distinctTaxonIds.size());
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_DISTINCT_TAXA_NO_MATCH, distinctTaxonIdsNoMatch.size());
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_STUDIES, studyCounter.getCount());
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_SOURCES, distinctSources.size());
    node.setProperty(PropertyAndValueDictionary.NUMBER_OF_DATASETS, distinctDatasets.size());
    getGraphDb().index().forNodes("reports").add(node, PropertyAndValueDictionary.COLLECTION, GLOBI_COLLECTION_NAME);
}
Also used : Dataset(org.globalbioticinteractions.dataset.Dataset) StudyNode(org.eol.globi.domain.StudyNode) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode) StudyNode(org.eol.globi.domain.StudyNode)

Example 38 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForMetaTableIT method importNHMStatic.

@Test
public void importNHMStatic() throws IOException, StudyImporterException {
    final List<Map<String, String>> links = new ArrayList<Map<String, String>>();
    final InteractionListener interactionListener = links::add;
    final DatasetImporterForMetaTable.TableParserFactory tableFactory = (config, dataset) -> {
        String firstFewLines = "\"InteractionID\",\"InteractionURL\",\"Species1UUID\",\"Species1Name\",\"Species1LifeCycleStage\",\"Species1OrganismPart\",\"Species1Status\",\"InteractionType\",\"InteractionOntologyURL\",\"Species2UUID\",\"Species2Name\",\"Species2LifeCycleStage\",\"Species2OrganismPart\",\"Species2Status\",\"LocationUUID\",\"LocationName\",\"LocationCountryName\",\"ISO2\",\"Importance\",\"InteractionRecordType\",\"Reference\",\"ReferenceDOI\",\"Reference Page\",\"Notes\"\n" + "\"4bee827f-c9f5-4c0e-9db3-e40a6e4d8008\",\"http://phthiraptera.info/node/94209\",\"c8faa033-237b-40b9-9b76-d9e7fcff9238\",\"Menacanthus alaudae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"e275d77c-e993-4de0-981f-b3f39fd4da9b\",\"Acanthis flavirostris\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"310\",\"[REF: Palma, Price & Hellenthal, 1998:310]\"\n" + "\"80e66e7c-75db-467f-9a89-a11f94d58eb3\",\"http://phthiraptera.info/node/94210\",\"fe5b2e50-b414-41d9-840d-189e732b2ea5\",\"Ricinus fringillae flammeae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"001ee8aa-dbab-43b8-9137-a61565ccf41b\",\"http://phthiraptera.info/node/94211\",\"ee17d179-9f60-4198-ac49-dc9dab3ae529\",\"Brueelia sibirica\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"d0929673-2f4c-49ec-877f-116e74ea360e\",\"http://phthiraptera.info/node/94212\",\"46084bc3-cfbf-4e01-96f8-5ecb50bc5ff9\",\"Ricinus fringillae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"2027cf09-f15d-4c2b-be28-9cb00fabf308\",\"Acanthis flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"204\",\"[REF: Rheinwald, 1968:204]\"\n";
        return CSVTSVUtil.createLabeledCSVParser(CSVTSVUtil.createExcelCSVParse(IOUtils.toInputStream(firstFewLines, StandardCharsets.UTF_8)));
    };
    final String baseUrl = "https://raw.githubusercontent.com/globalbioticinteractions/natural-history-museum-london-interactions-bank/main";
    final String resource = baseUrl + "/globi.json";
    importAll(interactionListener, tableFactory, baseUrl, resource);
    assertThat(links.size(), is(4));
    for (Map<String, String> firstLine : links) {
        assertNotNull(firstLine.get(DatasetImporterForTSV.INTERACTION_TYPE_NAME));
        assertNotNull(firstLine.get(TaxonUtil.TARGET_TAXON_ID));
        assertNotNull(firstLine.get(TaxonUtil.TARGET_TAXON_NAME));
        assertNotNull(firstLine.get(TaxonUtil.SOURCE_TAXON_ID));
        assertNotNull(firstLine.get(TaxonUtil.SOURCE_TAXON_NAME));
    }
}
Also used : TaxonUtil(org.eol.globi.service.TaxonUtil) CSVTSVUtil(org.eol.globi.util.CSVTSVUtil) DatasetImpl(org.globalbioticinteractions.dataset.DatasetImpl) URL(java.net.URL) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) ResourceUtil(org.eol.globi.util.ResourceUtil) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) JsonNode(com.fasterxml.jackson.databind.JsonNode) URI(java.net.URI) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StringStartsWith.startsWith(org.hamcrest.core.StringStartsWith.startsWith) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) IsNot.not(org.hamcrest.core.IsNot.not) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) InputStream(java.io.InputStream) InteractionListener(org.eol.globi.process.InteractionListener) ArrayList(java.util.ArrayList) Matchers.containsString(org.hamcrest.Matchers.containsString) Map(java.util.Map) Test(org.junit.Test)

Example 39 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForRSSTest method titleIncludeExcludePattern.

@Test
public void titleIncludeExcludePattern() throws StudyImporterException, IOException {
    String configJson = "{ \"url\": \"classpath:/org/eol/globi/data/rss_vertnet.xml\", " + "\"include\": \".*(Arctos).*\", " + "\"exclude\": \".*GGBN.*\", " + "\"hasDependencies\": true }";
    final Dataset dataset = datasetFor(configJson);
    assertFalse(DatasetImporterForRSS.shouldIncludeTitleInDatasetCollection("bla (Arctos) GGBN", dataset));
    assertTrue(DatasetImporterForRSS.shouldIncludeTitleInDatasetCollection("bla (Arctos)", dataset));
    assertFalse(DatasetImporterForRSS.shouldIncludeTitleInDatasetCollection("bla", dataset));
}
Also used : Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Example 40 with Dataset

use of org.globalbioticinteractions.dataset.Dataset in project eol-globi-data by jhpoelen.

the class DatasetImporterForRSSTest method readRSSVertnetWithoutConfig.

@Test
public void readRSSVertnetWithoutConfig() throws StudyImporterException, IOException {
    String configJson = "{ \"url\": \"classpath:/org/eol/globi/data/rss_vertnet.xml\" }";
    final Dataset dataset = datasetFor(configJson);
    List<Dataset> datasets = DatasetImporterForRSS.getDatasetsForFeed(dataset);
    assertThat(datasets.size(), is(263));
    assertThat(datasets.get(0).getOrDefault("hasDependencies", null), is("false"));
    assertThat(datasets.get(0).getOrDefault("url", null), is("http://ipt.vertnet.org:8080/ipt/archive.do?r=utep_mamm"));
    assertThat(datasets.get(0).getArchiveURI(), is(URI.create("http://ipt.vertnet.org:8080/ipt/archive.do?r=utep_mamm")));
}
Also used : Dataset(org.globalbioticinteractions.dataset.Dataset) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Aggregations

Dataset (org.globalbioticinteractions.dataset.Dataset)68 Test (org.junit.Test)46 DatasetImpl (org.globalbioticinteractions.dataset.DatasetImpl)25 URI (java.net.URI)20 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)17 JsonNode (com.fasterxml.jackson.databind.JsonNode)14 Matchers.containsString (org.hamcrest.Matchers.containsString)14 IOException (java.io.IOException)11 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)10 InputStream (java.io.InputStream)9 Is.is (org.hamcrest.core.Is.is)9 StudyNode (org.eol.globi.domain.StudyNode)8 Node (org.neo4j.graphdb.Node)8 URL (java.net.URL)7 ArrayList (java.util.ArrayList)7 StringStartsWith.startsWith (org.hamcrest.core.StringStartsWith.startsWith)7 Assert.assertNotNull (org.junit.Assert.assertNotNull)7 BaseDatasetImporter (org.eol.globi.data.BaseDatasetImporter)6 DatasetImporter (org.eol.globi.data.DatasetImporter)6 DatasetNode (org.eol.globi.domain.DatasetNode)6