use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.
the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToTaxa.
@Test
public void hasResourceRelationshipsOccurrenceToTaxa() throws IOException, URISyntaxException {
URI sampleArchive = getClass().getResource("inaturalist-dwca-rr.zip").toURI();
Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
importResourceRelationshipExtension(archive, new InteractionListener() {
@Override
public void on(Map<String, String> interaction) throws StudyImporterException {
numberOfFoundLinks.incrementAndGet();
if (1 == numberOfFoundLinks.get()) {
assertThat(interaction.get(TaxonUtil.SOURCE_TAXON_ID), is("http://www.inaturalist.org/taxa/465153"));
assertThat(interaction.get(SOURCE_TAXON_NAME), is("Gorgonocephalus eucnemis"));
assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("http://www.inaturalist.org/observations/2309983"));
assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Eaten by"));
assertThat(interaction.get(INTERACTION_TYPE_ID), is("http://www.inaturalist.org/observation_fields/879"));
assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("HumanObservation"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_ID), is("http://www.inaturalist.org/taxa/133061"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Enhydra lutris kenyoni"));
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("https://www.inaturalist.org/users/dpom"));
assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence | http://rs.tdwg.org/dwc/terms/Taxon"));
}
}
});
assertThat(numberOfFoundLinks.get(), is(1));
}
use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.
the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToOccurrence.
@Test
public void hasResourceRelationshipsOccurrenceToOccurrence() throws IOException, URISyntaxException {
URI sampleArchive = getClass().getResource("fmnh-rr-test.zip").toURI();
Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
importResourceRelationshipExtension(archive, new InteractionListener() {
@Override
public void on(Map<String, String> interaction) throws StudyImporterException {
numberOfFoundLinks.incrementAndGet();
if (1 == numberOfFoundLinks.get()) {
assertThat(interaction.get(relatedResourceID.qualifiedName()), is("http://n2t.net/ark:/65665/37d63a454-d948-4b1d-89db-89809887ef41"));
assertThat(interaction.get(SOURCE_TAXON_NAME), is("Trichobius parasparsus Wenzel, 1976"));
assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("8afec7db-7b19-44f7-8ac8-8d98614e71d2"));
assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Ectoparasite of"));
assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("http://n2t.net/ark:/65665/37d63a454-d948-4b1d-89db-89809887ef41"));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_CATALOG_NUMBER), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_COLLECTION_CODE), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_INSTITUTION_CODE), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("A. L. Tuttle | M. D. Tuttle"));
} else if (2 == numberOfFoundLinks.get()) {
assertThat(interaction.get(SOURCE_TAXON_NAME), is("Rhinolophus fumigatus aethiops"));
assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("7048675a-b110-4baf-91a3-2db138316709"));
assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Host to"));
assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("10d8d814-2afc-4cf2-9843-a2b719346179"));
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("G. Heinrich"));
} else if (8 == numberOfFoundLinks.get()) {
assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("3efb94e7-5182-4dd3-bec5-aa838ba22b4f"));
assertThat(interaction.get(SOURCE_TAXON_NAME), is("Thamnophis fulvus"));
assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Stomach Contents of"));
assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.TARGET_OCCURRENCE_ID), is("5c419063-682a-4b3f-8a27-9ed286717922"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Thamnophis fulvus"));
assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("C. M. Barber"));
}
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(notNullValue()));
assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
}
});
assertThat(numberOfFoundLinks.get(), is(8));
}
use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.
the class DatasetImporterForDwCATest method hasResourceRelationshipsOccurrenceToOccurrenceRemarks.
@Test
public void hasResourceRelationshipsOccurrenceToOccurrenceRemarks() throws IOException, URISyntaxException {
URI sampleArchive = getClass().getResource("fmnh-rr-remarks-test.zip").toURI();
Archive archive = DwCAUtil.archiveFor(sampleArchive, "target/tmp");
AtomicInteger numberOfFoundLinks = new AtomicInteger(0);
importResourceRelationshipExtension(archive, new InteractionListener() {
@Override
public void on(Map<String, String> interaction) throws StudyImporterException {
numberOfFoundLinks.incrementAndGet();
if (1 == numberOfFoundLinks.get()) {
assertThat(interaction.get(SOURCE_TAXON_NAME), is("Trichobius parasparsus Wenzel, 1976"));
assertThat(interaction.get(SOURCE_OCCURRENCE_ID), is("8afec7db-7b19-44f7-8ac8-8d98614e71d2"));
assertThat(interaction.get(INTERACTION_TYPE_NAME), is("Ectoparasite of"));
assertThat(interaction.get(INTERACTION_TYPE_ID), is(nullValue()));
assertThat(interaction.get(DatasetImporterForTSV.BASIS_OF_RECORD_NAME), is("PreservedSpecimen"));
assertThat(interaction.get(TaxonUtil.TARGET_TAXON_NAME), is("Donald duckus"));
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is("A. L. Tuttle | M. D. Tuttle"));
}
assertThat(interaction.get(DatasetImporterForTSV.REFERENCE_CITATION), is(notNullValue()));
assertThat(interaction.get(DatasetImporterForTSV.RESOURCE_TYPES), is("http://rs.tdwg.org/dwc/terms/ResourceRelationship | http://rs.tdwg.org/dwc/terms/Occurrence"));
}
});
assertThat(numberOfFoundLinks.get(), is(1));
}
use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.
the class DatasetImporterForDwCA method importStudy.
@Override
public void importStudy() throws StudyImporterException {
URI archiveURI = getDataset().getArchiveURI();
Path tmpDwA = null;
Thread deleteOnShutdownHook = null;
try {
if (getDataset() == null) {
throw new IllegalArgumentException("no dataset found");
}
String archiveURL = getDataset().getOrDefault("url", archiveURI == null ? null : archiveURI.toString());
getLogger().info(null, "[" + archiveURL + "]: indexing interaction records");
File dwcaFile = null;
try {
URI dwcaURI = URI.create(archiveURL);
tmpDwA = Files.createTempDirectory("dwca");
final File tmpDir = tmpDwA.toFile();
deleteOnShutdownHook = addDeleteOnShutdownHook(tmpDir);
Archive archive;
if (CacheUtil.isLocalDir(dwcaURI)) {
archive = DwCAUtil.archiveFor(dwcaURI, tmpDwA.toString());
} else {
dwcaFile = File.createTempFile("dwca", "tmp.zip");
FileUtils.copyToFile(getDataset().retrieve(dwcaURI), dwcaFile);
dwcaFile.deleteOnExit();
archive = DwCAUtil.archiveFor(dwcaFile.toURI(), tmpDwA.toString());
}
InteractionListenerWithContext listenerWithContext = new InteractionListenerWithContext();
try (InteractionListenerClosable referencingListener = createReferenceEnricher(archive, listenerWithContext)) {
importDescriptionExtension(archive, referencingListener, getLogger());
importResourceRelationshipExtension(archive, referencingListener);
importAssociatedTaxaExtension(archive, referencingListener);
int i = importCore(archive, listenerWithContext);
getLogger().info(null, "[" + archiveURL + "]: scanned [" + i + "] record(s)");
}
} finally {
removeDeleteOnShutdownHook(deleteOnShutdownHook);
if (dwcaFile != null && dwcaFile.exists() && dwcaFile.isFile()) {
FileUtils.deleteQuietly(dwcaFile);
}
}
} catch (IOException | IllegalStateException e) {
// see https://github.com/globalbioticinteractions/globalbioticinteractions/issues/409
throw new StudyImporterException("failed to read archive [" + archiveURI + "]", e);
} finally {
if (tmpDwA != null) {
org.apache.commons.io.FileUtils.deleteQuietly(tmpDwA.toFile());
}
}
}
use of org.gbif.dwc.Archive in project eol-globi-data by jhpoelen.
the class DwCAUtil method archiveFor.
public static Archive archiveFor(URI archiveURI, String tmpDir) throws IOException {
Archive archive;
Path myArchiveFile = Paths.get(archiveURI);
try {
if (myArchiveFile.toFile().isFile()) {
if (StringUtils.isBlank(tmpDir)) {
throw new IllegalArgumentException("cannot read [" + archiveURI + "] without a tmpDir");
}
Path extractToFolder = Paths.get(tmpDir);
archive = DwcFiles.fromCompressed(myArchiveFile, extractToFolder);
} else {
archive = DwcFiles.fromLocation(myArchiveFile);
}
} catch (UnsupportedArchiveException e) {
throw new IOException("failed to read [" + archiveURI + "]", e);
}
return archive;
}
Aggregations