Search in sources :

Example 1 with Archive

use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToTaxa.

@Test
public void hasResourceRelationshipsOccurrenceToTaxa() throws IOException, URISyntaxException {
    URI sampleArchive = getClass().getResource("inaturalist-dwca-rr.zip").toURI();
    Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
    AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
    importResourceRelationshipExtension(archive, new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            numberOfFoundLinks.incrementAndGet();
            if (1 == numberOfFoundLinks.get()) {
                assertThat(interaction.get(TaxonUtil.SOURCE_TAXON_ID), is("http://www.inaturalist.org/taxa/465153"));
                assertThat(interaction.get(SOURCE_TAXON_NAME), is("Gorgonocephalus eucnemis"));
                assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("http://www.inaturalist.org/observations/2309983"));
                assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Eaten by"));
                assertThat(interaction.get(INTERACTION_TYPE_ID), is("http://www.inaturalist.org/observation_fields/879"));
                assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("HumanObservation"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_ID), is("http://www.inaturalist.org/taxa/133061"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Enhydra lutris kenyoni"));
                assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("https://www.inaturalist.org/users/dpom"));
                assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence | http://rs.tdwg.org/dwc/terms/Taxon"));
            }
        }
    });
    assertThat(numberOfFoundLinks.get(), is(1));
}
Also used : InteractionListener(org.eol.globi.process.InteractionListener) Archive(org.gbif.dwc.Archive) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) Test(org.junit.Test)

Example 2 with Archive

use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToOccurrence.

@Test
public void hasResourceRelationshipsOccurrenceToOccurrence() throws IOException, URISyntaxException {
    URI sampleArchive = getClass().getResource("fmnh-rr-test.zip").toURI();
    Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
    AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
    importResourceRelationshipExtension(archive, new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            numberOfFoundLinks.incrementAndGet();
            if (1 == numberOfFoundLinks.get()) {
                assertThat(interaction.get(relatedResourceID.qualifiedName()), is("http://n2t.net/ark:/65665/37d63a454-d948-4b1d-89db-89809887ef41"));
                assertThat(interaction.get(SOURCE_TAXON_NAME), is("Trichobius parasparsus Wenzel, 1976"));
                assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("8afec7db-7b19-44f7-8ac8-8d98614e71d2"));
                assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Ectoparasite of"));
                assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("http://n2t.net/ark:/65665/37d63a454-d948-4b1d-89db-89809887ef41"));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_CATALOG_NUMBER), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_COLLECTION_CODE), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_INSTITUTION_CODE), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("A. L. Tuttle | M. D. Tuttle"));
            } else if (2 == numberOfFoundLinks.get()) {
                assertThat(interaction.get(SOURCE_TAXON_NAME), is("Rhinolophus fumigatus aethiops"));
                assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("7048675a-b110-4baf-91a3-2db138316709"));
                assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Host to"));
                assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("10d8d814-2afc-4cf2-9843-a2b719346179"));
                assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("G. Heinrich"));
            } else if (8 == numberOfFoundLinks.get()) {
                assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("3efb94e7-5182-4dd3-bec5-aa838ba22b4f"));
                assertThat(interaction.get(SOURCE_TAXON_NAME), is("Thamnophis fulvus"));
                assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Stomach Contents of"));
                assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("5c419063-682a-4b3f-8a27-9ed286717922"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Thamnophis fulvus"));
                assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
                assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("C. M. Barber"));
            }
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(notNullValue()));
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
        }
    });
    assertThat(numberOfFoundLinks.get(), is(8));
}
Also used : InteractionListener(org.eol.globi.process.InteractionListener) Archive(org.gbif.dwc.Archive) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) Test(org.junit.Test)

Example 3 with Archive

use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToOccurrenceRemarks.

@Test
public void hasResourceRelationshipsOccurrenceToOccurrenceRemarks() throws IOException, URISyntaxException {
    URI sampleArchive = getClass().getResource("fmnh-rr-remarks-test.zip").toURI();
    Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
    AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
    importResourceRelationshipExtension(archive, new InteractionListener() {

        @Override
        public void on(Map<String, String> interaction) throws StudyImporterException {
            numberOfFoundLinks.incrementAndGet();
            if (1 == numberOfFoundLinks.get()) {
                assertThat(interaction.get(SOURCE_TAXON_NAME), is("Trichobius parasparsus Wenzel, 1976"));
                assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("8afec7db-7b19-44f7-8ac8-8d98614e71d2"));
                assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Ectoparasite of"));
                assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
                assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
                assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Donald duckus"));
                assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("A. L. Tuttle | M. D. Tuttle"));
            }
            assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(notNullValue()));
            assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
        }
    });
    assertThat(numberOfFoundLinks.get(), is(1));
}
Also used : InteractionListener(org.eol.globi.process.InteractionListener) Archive(org.gbif.dwc.Archive) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) URI(java.net.URI) Test(org.junit.Test)

Example 4 with Archive

use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.

the class DatasetImporterForDwCA method importStudy.

@Override
public void importStudy() throws StudyImporterException {
    URI archiveURI = getDataset().getArchiveURI();
    Path tmpDwA = null;
    Thread deleteOnShutdownHook = null;
    try {
        if (getDataset() == null) {
            throw new IllegalArgumentException("no dataset found");
        }
        String archiveURL = getDataset().getOrDefault("url", archiveURI == null ? null : archiveURI.toString());
        getLogger().info(null, "[" + archiveURL + "]: indexing interaction records");
        File dwcaFile = null;
        try {
            URI dwcaURI = URI.create(archiveURL);
            tmpDwA = Files.createTempDirectory("dwca");
            final File tmpDir = tmpDwA.toFile();
            deleteOnShutdownHook = addDeleteOnShutdownHook(tmpDir);
            Archive archive;
            if (CacheUtil.isLocalDir(dwcaURI)) {
                archive = DwCAUtil.archiveFor(dwcaURI, tmpDwA.toString());
            } else {
                dwcaFile = File.createTempFile("dwca", "tmp.zip");
                FileUtils.copyToFile(getDataset().retrieve(dwcaURI), dwcaFile);
                dwcaFile.deleteOnExit();
                archive = DwCAUtil.archiveFor(dwcaFile.toURI(), tmpDwA.toString());
            }
            InteractionListenerWithContext listenerWithContext = new InteractionListenerWithContext();
            try (InteractionListenerClosable referencingListener = createReferenceEnricher(archive, listenerWithContext)) {
                importDescriptionExtension(archive, referencingListener, getLogger());
                importResourceRelationshipExtension(archive, referencingListener);
                importAssociatedTaxaExtension(archive, referencingListener);
                int i = importCore(archive, listenerWithContext);
                getLogger().info(null, "[" + archiveURL + "]: scanned [" + i + "] record(s)");
            }
        } finally {
            removeDeleteOnShutdownHook(deleteOnShutdownHook);
            if (dwcaFile != null && dwcaFile.exists() && dwcaFile.isFile()) {
                FileUtils.deleteQuietly(dwcaFile);
            }
        }
    } catch (IOException | IllegalStateException e) {
        // see https://github.com/globalbioticinteractions/globalbioticinteractions/issues/409
        throw new StudyImporterException("failed to read archive [" + archiveURI + "]", e);
    } finally {
        if (tmpDwA != null) {
            org.apache.commons.io.FileUtils.deleteQuietly(tmpDwA.toFile());
        }
    }
}
Also used : Path(java.nio.file.Path) Archive(org.gbif.dwc.Archive) IOException(java.io.IOException) URI(java.net.URI) InteractionListenerClosable(org.eol.globi.process.InteractionListenerClosable) ArchiveFile(org.gbif.dwc.ArchiveFile) File(java.io.File)

Example 5 with Archive

use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.

the class DwCAUtil method archiveFor.

public static Archive archiveFor(URI archiveURI, String tmpDir) throws IOException {
    Archive archive;
    Path myArchiveFile = Paths.get(archiveURI);
    try {
        if (myArchiveFile.toFile().isFile()) {
            if (StringUtils.isBlank(tmpDir)) {
                throw new IllegalArgumentException("cannot read [" + archiveURI + "] without a tmpDir");
            }
            Path extractToFolder = Paths.get(tmpDir);
            archive = DwcFiles.fromCompressed(myArchiveFile, extractToFolder);
        } else {
            archive = DwcFiles.fromLocation(myArchiveFile);
        }
    } catch (UnsupportedArchiveException e) {
        throw new IOException("failed to read [" + archiveURI + "]", e);
    }
    return archive;
}
Also used : Path(java.nio.file.Path) Archive(org.gbif.dwc.Archive) IOException(java.io.IOException) UnsupportedArchiveException(org.gbif.dwc.UnsupportedArchiveException)

Aggregations

Archive (org.gbif.dwc.Archive)11 URI (java.net.URI)10 Test (org.junit.Test)9 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)6 InteractionListener (org.eol.globi.process.InteractionListener)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 IOException (java.io.IOException)2 Path (java.nio.file.Path)2 File (java.io.File)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 InteractionListenerClosable (org.eol.globi.process.InteractionListenerClosable)1 ArchiveFile (org.gbif.dwc.ArchiveFile)1 UnsupportedArchiveException (org.gbif.dwc.UnsupportedArchiveException)1