use of org.gbif.ipt.model.ExtensionMapping in project ipt by gbif.
the class ExtensionManagerImpl method migrateResourceToNewExtensionVersion.
/**
* Migrate a resource's extension mappings to an extension to a newer version of that extension.
*
* @param r resource whose mappings must be migrated
* @param current extension
* @param newer newer version of extension to migrate mappings to
*/
protected void migrateResourceToNewExtensionVersion(Resource r, Extension current, Extension newer) {
// sanity check that the current and newer extensions share same rowType
if (!current.getRowType().equalsIgnoreCase(newer.getRowType()) || r.getMappings(current.getRowType()).isEmpty()) {
throw new IllegalStateException();
}
LOG.info("Migrating " + r.getShortname() + " mappings to extension " + current.getRowType() + " to latest extension version");
// populate various set to keep track of how many terms were deprecated, how terms' vocabulary was updated, etc
Set<ExtensionProperty> deprecated = new HashSet<>();
Set<ExtensionProperty> vocabulariesRemoved = new HashSet<>();
Set<ExtensionProperty> vocabulariesUnchanged = new HashSet<>();
Set<ExtensionProperty> vocabulariesUpdated = new HashSet<>();
for (ExtensionProperty property : current.getProperties()) {
// newer extension still contain this property?
if (!newer.hasProperty(property.qualifiedName())) {
deprecated.add(property);
} else // if so, check if this property uses a vocabulary, and whether the newer extension uses a newer version of it
{
if (property.getVocabulary() != null) {
Vocabulary v1 = property.getVocabulary();
Vocabulary v2 = newer.getProperty(property.qualifiedName()).getVocabulary();
// case 1: vocabulary removed in newer version
if (v2 == null) {
vocabulariesRemoved.add(property);
} else // case 2: vocabulary versions are unchanged between versions
if (v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
vocabulariesUnchanged.add(property);
} else // case 3: vocabulary has been updated in newer version
if (!v1.getUriString().equalsIgnoreCase(v2.getUriString())) {
vocabulariesUpdated.add(property);
}
}
}
}
LOG.debug(deprecated.size() + " properties have been deprecated in the newer version");
LOG.debug(vocabulariesRemoved.size() + " properties in the newer version of extension no longer use a vocabulary");
LOG.debug(vocabulariesUnchanged.size() + " properties in the newer version of extension use the same vocabulary");
LOG.debug(vocabulariesUpdated.size() + " properties in the newer version of extension use a newer vocabulary");
// set of new terms (terms to add)
Set<ExtensionProperty> added = new HashSet<>();
for (ExtensionProperty property : newer.getProperties()) {
// older extension contain this property?
if (!current.hasProperty(property.qualifiedName())) {
added.add(property);
}
}
LOG.debug("Newer version of extension has " + added.size() + " new properties");
for (ExtensionMapping extensionMapping : r.getMappings(current.getRowType())) {
migrateExtensionMapping(extensionMapping, newer, deprecated);
}
}
use of org.gbif.ipt.model.ExtensionMapping in project ipt by gbif.
the class ResourceManagerImpl method createFromArchive.
private Resource createFromArchive(String shortname, File dwca, User creator, ActionLogger alog) throws AlreadyExistingException, ImportException, InvalidFilenameException {
Objects.requireNonNull(shortname);
// check if existing already
if (get(shortname) != null) {
throw new AlreadyExistingException();
}
Resource resource;
try {
// try to read dwca
Archive arch = DwcFiles.fromLocation(dwca.toPath());
if (arch.getCore() == null) {
alog.error("manage.resource.create.core.invalid");
throw new ImportException("Darwin Core Archive is invalid and does not have a core mapping");
}
if (arch.getCore().getRowType() == null) {
alog.error("manage.resource.create.core.invalid.rowType");
throw new ImportException("Darwin Core Archive is invalid, core mapping has no rowType");
}
// keep track of source files as a dwca might refer to the same source file multiple times
Map<String, TextFileSource> sources = new HashMap<>();
// determine core type for the resource based on the rowType
Term coreRowType = arch.getCore().getRowType();
CoreRowType resourceType;
if (coreRowType.equals(DwcTerm.Taxon)) {
resourceType = CoreRowType.CHECKLIST;
} else if (coreRowType.equals(DwcTerm.Occurrence)) {
resourceType = CoreRowType.OCCURRENCE;
} else if (coreRowType.equals(DwcTerm.Event)) {
resourceType = CoreRowType.SAMPLINGEVENT;
} else {
resourceType = CoreRowType.OTHER;
}
// create new resource
resource = create(shortname, resourceType.toString().toUpperCase(Locale.ENGLISH), creator);
// read core source+mappings
TextFileSource s = importSource(resource, arch.getCore());
sources.put(arch.getCore().getLocation(), s);
ExtensionMapping map = importMappings(alog, arch.getCore(), s);
resource.addMapping(map);
// the core must contain an id element that indicates the identifier for a record
if (!arch.getExtensions().isEmpty()) {
if (map.getIdColumn() == null) {
alog.error("manage.resource.create.core.invalid.id");
throw new ImportException("Darwin Core Archive is invalid, core mapping has no id element");
}
// read extension sources+mappings
for (ArchiveFile ext : arch.getExtensions()) {
if (sources.containsKey(ext.getLocation())) {
s = sources.get(ext.getLocation());
LOG.debug("SourceBase " + s.getName() + " shared by multiple extensions");
} else {
s = importSource(resource, ext);
sources.put(ext.getLocation(), s);
}
map = importMappings(alog, ext, s);
if (map.getIdColumn() == null) {
alog.error("manage.resource.create.core.invalid.coreid");
throw new ImportException("Darwin Core Archive is invalid, extension mapping has no coreId element");
}
// ensure the extension contains a coreId term mapping with the correct coreId index
if (resource.getCoreRowType() != null) {
updateExtensionCoreIdMapping(map, resource.getCoreRowType());
}
resource.addMapping(map);
}
}
// try to read metadata
Eml eml = readMetadata(resource.getShortname(), arch, alog);
if (eml != null) {
resource.setEml(eml);
}
// finally persist the whole thing
save(resource);
alog.info("manage.resource.create.success", new String[] { StringUtils.trimToEmpty(resource.getCoreRowType()), String.valueOf(resource.getSources().size()), String.valueOf(resource.getMappings().size()) });
} catch (UnsupportedArchiveException | InvalidConfigException | IOException e) {
alog.warn(e.getMessage(), e);
throw new ImportException(e);
}
return resource;
}
use of org.gbif.ipt.model.ExtensionMapping in project ipt by gbif.
the class ResourceManagerImpl method loadFromDir.
/**
* Reads a complete resource configuration (resource config & eml) from the resource config folder
* and returns the Resource instance for the internal in memory cache.
*/
private Resource loadFromDir(File resourceDir, @Nullable User creator, ActionLogger alog) throws InvalidConfigException {
if (resourceDir.exists()) {
// load full configuration from resource.xml and eml.xml files
String shortname = resourceDir.getName();
try {
File cfgFile = dataDir.resourceFile(shortname, PERSISTENCE_FILE);
InputStream input = new FileInputStream(cfgFile);
Resource resource = (Resource) xstream.fromXML(input);
// populate missing creator - it cannot be null! (this fixes issue #1309)
if (creator != null && resource.getCreator() == null) {
resource.setCreator(creator);
LOG.warn("On load, populated missing creator for resource: " + shortname);
}
// non existing users end up being a NULL in the set, so remove them
// shouldnt really happen - but people can even manually cause a mess
resource.getManagers().remove(null);
// therefore if a non-Taxon core extension is using auto-generated IDs, the coreID is set to No ID (-99)
for (ExtensionMapping ext : resource.getMappings()) {
Extension x = ext.getExtension();
if (x == null) {
alog.warn("manage.resource.create.extension.null");
throw new InvalidConfigException(TYPE.INVALID_EXTENSION, "Resource references non-existent extension");
} else if (extensionManager.get(x.getRowType()) == null) {
alog.warn("manage.resource.create.rowType.null", new String[] { x.getRowType() });
throw new InvalidConfigException(TYPE.INVALID_EXTENSION, "Resource references non-installed extension");
}
// is the ExtensionMapping of core type, not taxon core type, and uses a coreIdColumn mapping?
if (ext.isCore() && !ext.isTaxonCore() && ext.getIdColumn() != null) {
if (ext.getIdColumn().equals(ExtensionMapping.IDGEN_LINE_NUMBER) || ext.getIdColumn().equals(ExtensionMapping.IDGEN_UUID)) {
ext.setIdColumn(ExtensionMapping.NO_ID);
}
}
}
// shortname persists as folder name, so xstream doesnt handle this:
resource.setShortname(shortname);
// infer coreType if null
if (resource.getCoreType() == null) {
inferCoreType(resource);
}
// standardize subtype if not null
if (resource.getSubtype() != null) {
standardizeSubtype(resource);
}
// add proper source file pointer
for (Source src : resource.getSources()) {
src.setResource(resource);
if (src instanceof FileSource) {
FileSource frSrc = (FileSource) src;
frSrc.setFile(dataDir.sourceFile(resource, frSrc));
}
}
// pre v2.2 resources: set IdentifierStatus if null
if (resource.getIdentifierStatus() == null) {
resource.setIdentifierStatus(IdentifierStatus.UNRESERVED);
}
// load eml (this must be done before trying to convert version below)
loadEml(resource);
// pre v2.2 resources: convert resource version from integer to major_version.minor_version style
// also convert/rename eml, rtf, and dwca versioned files also
BigDecimal converted = convertVersion(resource);
if (converted != null) {
updateResourceVersion(resource, resource.getEmlVersion(), converted);
}
// pre v2.2 resources: construct a VersionHistory for last published version (if appropriate)
VersionHistory history = constructVersionHistoryForLastPublishedVersion(resource);
if (history != null) {
resource.addVersionHistory(history);
}
// pre v2.2.1 resources: rename dwca.zip to dwca-18.0.zip (where 18.0 is the last published version for example)
if (resource.getLastPublishedVersionsVersion() != null) {
renameDwcaToIncludeVersion(resource, resource.getLastPublishedVersionsVersion());
}
// update EML with latest resource basics (version and GUID)
syncEmlWithResource(resource);
LOG.debug("Read resource configuration for " + shortname);
return resource;
} catch (Exception e) {
LOG.error("Cannot read resource configuration for " + shortname, e);
throw new InvalidConfigException(TYPE.RESOURCE_CONFIG, "Cannot read resource configuration for " + shortname + ": " + e.getMessage());
}
}
return null;
}
use of org.gbif.ipt.model.ExtensionMapping in project ipt by gbif.
the class GenerateDwca method addFieldsToArchive.
/**
* First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
* term a field in the final archive. Static/default mappings are not stored for a field, since they are not
* expressed in meta.xml but instead get written to the data file.
*
* @param mappings list of ExtensionMapping
* @param af ArchiveFile
*
* @return set of conceptTerms that have been mapped (in all files) for a single Extension
*/
private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFile af) throws GeneratorException {
Set<Term> mappedConceptTerms = new HashSet<>();
for (ExtensionMapping m : mappings) {
// multi-value field delimiter, part of each source data configuration
String delimitedBy = StringUtils.trimToNull(m.getSource().getMultiValueFieldsDelimitedBy());
for (PropertyMapping pm : m.getFields()) {
Term term = TERM_FACTORY.findTerm(pm.getTerm().qualifiedName());
// ensure Extension has concept term
if (term != null && m.getExtension().getProperty(term) != null) {
if (af.hasTerm(term)) {
ArchiveField field = af.getField(term);
mappedConceptTerms.add(term);
// multi-value delimiter must be same across all sources
if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(delimitedBy)) {
throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + m.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
}
} else {
if ((pm.getIndex() != null && pm.getIndex() >= 0) || pm.getIndex() == null) {
log.debug("Handling property mapping for term: " + term.qualifiedName() + " (index " + pm.getIndex() + ")");
af.addField(buildField(term, delimitedBy));
mappedConceptTerms.add(term);
}
}
}
}
// if Extension has datasetID concept term, check if resource DOI should be used as value for mapping
ExtensionProperty ep = m.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
if (ep != null && m.isDoiUsedForDatasetId()) {
log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
// include datasetID field in ArchiveFile
ArchiveField f = buildField(DwcTerm.datasetID, null);
af.addField(f);
// include datasetID field mapping in ExtensionMapping
PropertyMapping pm = new PropertyMapping(f);
pm.setTerm(ep);
m.getFields().add(pm);
// include datasetID in set of all terms mapped for Extension
mappedConceptTerms.add(DwcTerm.datasetID);
}
}
return mappedConceptTerms;
}
use of org.gbif.ipt.model.ExtensionMapping in project ipt by gbif.
the class GenerateDwca method addDataFile.
/**
* Adds a single data file for a list of extension mappings that must all be mapped to the same extension.
* </br>
* The ID column is always the 1st column (index 0) and is always equal to the core record identifier that has been
* mapped (e.g. occurrenceID, taxonID, etc).
*
* @param mappings list of ExtensionMapping
* @param rowLimit maximum number of rows to write
* @throws IllegalArgumentException if not all mappings are mapped to the same extension
* @throws InterruptedException if the thread was interrupted
* @throws IOException if problems occurred while persisting new data files
* @throws GeneratorException if any problem was encountered writing data file
*/
public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLimit) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
checkForInterruption();
if (mappings == null || mappings.isEmpty()) {
return;
}
// update reporting
currRecords = 0;
currRecordsSkipped = 0;
Extension ext = mappings.get(0).getExtension();
currExtension = ext.getTitle();
// verify that all mappings share this extension
for (ExtensionMapping m : mappings) {
if (!ext.equals(m.getExtension())) {
throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + ext.getRowType());
}
}
// create new tab file with the help of the Archive class representing the core file or an extension
ArchiveFile af = ArchiveFile.buildTabFile();
af.setRowType(TERM_FACTORY.findTerm(ext.getRowType()));
af.setEncoding(CHARACTER_ENCODING);
af.setDateFormat("YYYY-MM-DD");
// in the generated file column 0 will be the id column
ArchiveField idField = new ArchiveField();
idField.setIndex(ID_COLUMN_INDEX);
af.setId(idField);
// find the union of all terms mapped and make them a field in the final archive
Set<Term> mappedConceptTerms = addFieldsToArchive(mappings, af);
// retrieve the ordered list of mapped ExtensionProperty
List<ExtensionProperty> propertyList = getOrderedMappedExtensionProperties(ext, mappedConceptTerms);
// reassign indexes ordered by Extension
assignIndexesOrderedByExtension(propertyList, af);
// total column count is equal to id column + mapped columns
int totalColumns = 1 + propertyList.size();
// create file name from extension name, with incremental suffix to resolve name conflicts (e.g. taxon.txt,
// taxon2.txt, taxon3.txt)
String extensionName = (ext.getName() == null) ? "f" : ext.getName().toLowerCase().replaceAll("\\s", "_");
String fn = createFileName(dwcaFolder, extensionName);
// open new file writer for single data file
File dataFile = new File(dwcaFolder, fn);
// ready to go though each mapping and dump the data
try (Writer writer = org.gbif.utils.file.FileUtils.startNewUtf8File(dataFile)) {
af.addLocation(dataFile.getName());
addMessage(Level.INFO, "Start writing data file for " + currExtension);
boolean headerWritten = false;
for (ExtensionMapping m : mappings) {
// prepare index ordered list of all output columns apart from id column
PropertyMapping[] inCols = new PropertyMapping[totalColumns];
for (ArchiveField f : af.getFields().values()) {
if (f.getIndex() != null && f.getIndex() > ID_COLUMN_INDEX) {
inCols[f.getIndex()] = m.getField(f.getTerm().qualifiedName());
}
}
// write header line 1 time only to file
if (!headerWritten) {
writeHeaderLine(propertyList, totalColumns, af, writer);
headerWritten = true;
}
// write data (records) to file
dumpData(writer, inCols, m, totalColumns, rowLimit, resource.getDoi());
// store record number by extension rowType
recordsByExtension.put(ext.getRowType(), currRecords);
}
} catch (IOException e) {
// some error writing this file, report
log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
// set last error report!
setState(e);
throw new GeneratorException("Error writing header line to data file", e);
}
// add archive file to archive
if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(ext.getRowType())) {
archive.setCore(af);
} else {
archive.addExtension(af);
}
// final reporting
addMessage(Level.INFO, "Data file written for " + currExtension + " with " + currRecords + " records and " + totalColumns + " columns");
// how many records were skipped?
if (currRecordsSkipped > 0) {
addMessage(Level.WARN, "!!! " + currRecordsSkipped + " records were skipped for " + currExtension + " due to errors interpreting line, or because the line was empty");
}
}
Aggregations