Search in sources :

Example 1 with TextFileSource

use of org.gbif.ipt.model.TextFileSource in project ipt by gbif.

the class ResourceManagerImpl method createFromArchive.

private Resource createFromArchive(String shortname, File dwca, User creator, ActionLogger alog) throws AlreadyExistingException, ImportException, InvalidFilenameException {
    Objects.requireNonNull(shortname);
    // check if existing already
    if (get(shortname) != null) {
        throw new AlreadyExistingException();
    }
    Resource resource;
    try {
        // try to read dwca
        Archive arch = DwcFiles.fromLocation(dwca.toPath());
        if (arch.getCore() == null) {
            alog.error("manage.resource.create.core.invalid");
            throw new ImportException("Darwin Core Archive is invalid and does not have a core mapping");
        }
        if (arch.getCore().getRowType() == null) {
            alog.error("manage.resource.create.core.invalid.rowType");
            throw new ImportException("Darwin Core Archive is invalid, core mapping has no rowType");
        }
        // keep track of source files as a dwca might refer to the same source file multiple times
        Map<String, TextFileSource> sources = new HashMap<>();
        // determine core type for the resource based on the rowType
        Term coreRowType = arch.getCore().getRowType();
        CoreRowType resourceType;
        if (coreRowType.equals(DwcTerm.Taxon)) {
            resourceType = CoreRowType.CHECKLIST;
        } else if (coreRowType.equals(DwcTerm.Occurrence)) {
            resourceType = CoreRowType.OCCURRENCE;
        } else if (coreRowType.equals(DwcTerm.Event)) {
            resourceType = CoreRowType.SAMPLINGEVENT;
        } else {
            resourceType = CoreRowType.OTHER;
        }
        // create new resource
        resource = create(shortname, resourceType.toString().toUpperCase(Locale.ENGLISH), creator);
        // read core source+mappings
        TextFileSource s = importSource(resource, arch.getCore());
        sources.put(arch.getCore().getLocation(), s);
        ExtensionMapping map = importMappings(alog, arch.getCore(), s);
        resource.addMapping(map);
        // the core must contain an id element that indicates the identifier for a record
        if (!arch.getExtensions().isEmpty()) {
            if (map.getIdColumn() == null) {
                alog.error("manage.resource.create.core.invalid.id");
                throw new ImportException("Darwin Core Archive is invalid, core mapping has no id element");
            }
            // read extension sources+mappings
            for (ArchiveFile ext : arch.getExtensions()) {
                if (sources.containsKey(ext.getLocation())) {
                    s = sources.get(ext.getLocation());
                    LOG.debug("SourceBase " + s.getName() + " shared by multiple extensions");
                } else {
                    s = importSource(resource, ext);
                    sources.put(ext.getLocation(), s);
                }
                map = importMappings(alog, ext, s);
                if (map.getIdColumn() == null) {
                    alog.error("manage.resource.create.core.invalid.coreid");
                    throw new ImportException("Darwin Core Archive is invalid, extension mapping has no coreId element");
                }
                // ensure the extension contains a coreId term mapping with the correct coreId index
                if (resource.getCoreRowType() != null) {
                    updateExtensionCoreIdMapping(map, resource.getCoreRowType());
                }
                resource.addMapping(map);
            }
        }
        // try to read metadata
        Eml eml = readMetadata(resource.getShortname(), arch, alog);
        if (eml != null) {
            resource.setEml(eml);
        }
        // finally persist the whole thing
        save(resource);
        alog.info("manage.resource.create.success", new String[] { StringUtils.trimToEmpty(resource.getCoreRowType()), String.valueOf(resource.getSources().size()), String.valueOf(resource.getMappings().size()) });
    } catch (UnsupportedArchiveException | InvalidConfigException | IOException e) {
        alog.warn(e.getMessage(), e);
        throw new ImportException(e);
    }
    return resource;
}
Also used : Archive(org.gbif.dwc.Archive) HashMap(java.util.HashMap) ArrayListValuedHashMap(org.apache.commons.collections4.multimap.ArrayListValuedHashMap) Eml(org.gbif.metadata.eml.Eml) AlreadyExistingException(org.gbif.ipt.service.AlreadyExistingException) Resource(org.gbif.ipt.model.Resource) TextFileSource(org.gbif.ipt.model.TextFileSource) Term(org.gbif.dwc.terms.Term) DwcTerm(org.gbif.dwc.terms.DwcTerm) InvalidConfigException(org.gbif.ipt.service.InvalidConfigException) IOException(java.io.IOException) UnsupportedArchiveException(org.gbif.dwc.UnsupportedArchiveException) ArchiveFile(org.gbif.dwc.ArchiveFile) ImportException(org.gbif.ipt.service.ImportException) ExtensionMapping(org.gbif.ipt.model.ExtensionMapping) CoreRowType(org.gbif.ipt.model.Resource.CoreRowType)

Example 2 with TextFileSource

use of org.gbif.ipt.model.TextFileSource in project ipt by gbif.

the class ResourceManagerImpl method importSource.

private TextFileSource importSource(Resource config, ArchiveFile af) throws ImportException, InvalidFilenameException {
    File extFile = af.getLocationFile();
    TextFileSource s = (TextFileSource) sourceManager.add(config, extFile, af.getLocation());
    SourceManagerImpl.copyArchiveFileProperties(af, s);
    // make an adjustment now that the exact number of header rows are known
    if (s.getIgnoreHeaderLines() != 1) {
        LOG.info("Adjusting row count to " + (s.getRows() + 1 - s.getIgnoreHeaderLines()) + " from " + s.getRows() + " since header count is declared as " + s.getIgnoreHeaderLines());
    }
    s.setRows(s.getRows() + 1 - s.getIgnoreHeaderLines());
    return s;
}
Also used : TextFileSource(org.gbif.ipt.model.TextFileSource) ArchiveFile(org.gbif.dwc.ArchiveFile) File(java.io.File)

Example 3 with TextFileSource

use of org.gbif.ipt.model.TextFileSource in project ipt by gbif.

the class SourceManagerImpl method addTextFile.

private TextFileSource addTextFile(File file) throws ImportException {
    TextFileSource src = new TextFileSource();
    try {
        // anaylze individual files using the dwca reader
        Archive arch = DwcFiles.fromLocation(file.toPath());
        copyArchiveFileProperties(arch.getCore(), src);
    } catch (UnknownDelimitersException e) {
        // this file is invalid
        LOG.warn(e.getMessage());
        throw new ImportException(e);
    } catch (IOException e) {
        LOG.warn(e.getMessage());
        throw new ImportException(e);
    } catch (UnsupportedArchiveException e) {
        // fine, cant read it with dwca library, but might still be a valid file for manual setup
        LOG.warn(e.getMessage());
    }
    return src;
}
Also used : ImportException(org.gbif.ipt.service.ImportException) Archive(org.gbif.dwc.Archive) UnknownDelimitersException(org.gbif.utils.file.csv.UnknownDelimitersException) TextFileSource(org.gbif.ipt.model.TextFileSource) IOException(java.io.IOException) UnsupportedArchiveException(org.gbif.dwc.UnsupportedArchiveException)

Example 4 with TextFileSource

use of org.gbif.ipt.model.TextFileSource in project ipt by gbif.

the class SourceManagerImpl method delete.

/**
 * {@inheritDoc}
 */
@Override
public boolean delete(Resource resource, Source source) {
    if (source == null) {
        return false;
    }
    resource.deleteSource(source);
    if (source instanceof TextFileSource) {
        // also delete source data file
        TextFileSource fs = (TextFileSource) source;
        fs.getFile().delete();
    }
    if (source instanceof ExcelFileSource) {
        // also delete source data file if no further source uses it
        ExcelFileSource es = (ExcelFileSource) source;
        boolean del = true;
        for (Source src : resource.getSources()) {
            if (!src.equals(es) && src.isExcelSource() && ((ExcelFileSource) src).getFile().equals(es.getFile())) {
                // another excel source using the same file, dont delete
                del = false;
                break;
            }
        }
        if (del) {
            es.getFile().delete();
        }
    }
    return true;
}
Also used : ExcelFileSource(org.gbif.ipt.model.ExcelFileSource) TextFileSource(org.gbif.ipt.model.TextFileSource) TextFileSource(org.gbif.ipt.model.TextFileSource) ExcelFileSource(org.gbif.ipt.model.ExcelFileSource) UrlSource(org.gbif.ipt.model.UrlSource) SqlSource(org.gbif.ipt.model.SqlSource) FileSource(org.gbif.ipt.model.FileSource) Source(org.gbif.ipt.model.Source)

Example 5 with TextFileSource

use of org.gbif.ipt.model.TextFileSource in project ipt by gbif.

the class SourceActionTest method testAlertColumnNumberChange.

@Test
public void testAlertColumnNumberChange() {
    Source src = new TextFileSource();
    src.setName("images");
    action.setSource(src);
    assertFalse(action.alertColumnNumberChange(false, 1, 2));
    assertFalse(action.alertColumnNumberChange(true, 0, 0));
    assertFalse(action.alertColumnNumberChange(true, 10, 10));
    assertTrue(action.alertColumnNumberChange(true, 1, 2));
    assertTrue(action.alertColumnNumberChange(true, 2, 1));
}
Also used : TextFileSource(org.gbif.ipt.model.TextFileSource) TextFileSource(org.gbif.ipt.model.TextFileSource) Source(org.gbif.ipt.model.Source) Test(org.junit.jupiter.api.Test)

Aggregations

TextFileSource (org.gbif.ipt.model.TextFileSource)15 File (java.io.File)10 Test (org.junit.jupiter.api.Test)8 Resource (org.gbif.ipt.model.Resource)7 ResourceManager (org.gbif.ipt.service.manage.ResourceManager)7 ExtensionMapping (org.gbif.ipt.model.ExtensionMapping)5 PropertyMapping (org.gbif.ipt.model.PropertyMapping)4 ArchiveFile (org.gbif.dwc.ArchiveFile)3 FileSource (org.gbif.ipt.model.FileSource)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 TreeSet (java.util.TreeSet)2 Archive (org.gbif.dwc.Archive)2 UnsupportedArchiveException (org.gbif.dwc.UnsupportedArchiveException)2 Extension (org.gbif.ipt.model.Extension)2 Source (org.gbif.ipt.model.Source)2 SqlSource (org.gbif.ipt.model.SqlSource)2 ImportException (org.gbif.ipt.service.ImportException)2 InvalidConfigException (org.gbif.ipt.service.InvalidConfigException)2 BeforeEach (org.junit.jupiter.api.BeforeEach)2