Search in sources :

Example 6 with DataTable

use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.

the class CSVFileReaderTest method testVariableUNFs.

/*
     * UNF test;
     * I'd like to use a file with more interesting values - "special" numbers, freaky dates, accents, etc.
     * for this. But checking it in with this simple file, for now.
     * (thinking about it, the "csv file from hell" may be a better test case for the UNF test)
     */
@Test
public void testVariableUNFs() {
    String testFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/election_precincts.csv";
    Long expectedNumberOfVariables = 13L;
    // aka the number of lines in the TAB file produced by the ingest plugin
    Long expectedNumberOfCases = 24L;
    String[] expectedUNFs = { "UNF:6:wb7OATtNC/leh1sOP5IGDQ==", "UNF:6:0V3xQ3ea56rzKwvGt9KBCA==", "UNF:6:0V3xQ3ea56rzKwvGt9KBCA==", "UNF:6:H9inAvq5eiIHW6lpqjjKhQ==", "UNF:6:Bh0M6QvunZwW1VoTyioRCQ==", "UNF:6:o5VTaEYz+0Kudf6hQEEupQ==", "UNF:6:eJRvbDJkIeDPrfN2dYpRfA==", "UNF:6:JD1wrtM12E7evrJJ3bRFGA==", "UNF:6:xUKbK9hb5o0nL5/mYiy7Bw==", "UNF:6:Mvq3BrdzoNhjndMiVr92Ww==", "UNF:6:KkHM6Qlyv3QlUd+BKqqB3Q==", "UNF:6:EWUVuyXKSpyllsrjHnheig==", "UNF:6:ri9JsRJxM2xpWSIq17oWNw==" };
    TabularDataIngest ingestResult = null;
    File generatedTabFile = null;
    DataTable generatedDataTable = null;
    try (BufferedInputStream stream = new BufferedInputStream(new FileInputStream(testFile))) {
        CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi());
        ingestResult = instance.read(stream, null);
        generatedTabFile = ingestResult.getTabDelimitedFile();
        generatedDataTable = ingestResult.getDataTable();
    } catch (IOException ex) {
        fail("" + ex);
    }
    assertNotNull(generatedDataTable);
    assertNotNull(generatedDataTable.getDataVariables());
    assertEquals(generatedDataTable.getVarQuantity(), new Long(generatedDataTable.getDataVariables().size()));
    assertEquals(generatedDataTable.getVarQuantity(), expectedNumberOfVariables);
    assertEquals(expectedNumberOfCases, generatedDataTable.getCaseQuantity());
    for (int i = 0; i < expectedNumberOfVariables; i++) {
        String unf = null;
        if (generatedDataTable.getDataVariables().get(i).isIntervalContinuous()) {
            FileInputStream generatedTabInputStream = null;
            try {
                generatedTabInputStream = new FileInputStream(generatedTabFile);
            } catch (FileNotFoundException ioex) {
                fail("Failed to open generated tab-delimited file for reading" + ioex);
            }
            Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
            try {
                unf = UNFUtil.calculateUNF(columnVector);
            } catch (IOException | UnfException ioex) {
                fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", floating point)");
            }
        }
        if (generatedDataTable.getDataVariables().get(i).isIntervalDiscrete() && generatedDataTable.getDataVariables().get(i).isTypeNumeric()) {
            FileInputStream generatedTabInputStream = null;
            try {
                generatedTabInputStream = new FileInputStream(generatedTabFile);
            } catch (FileNotFoundException ioex) {
                fail("Failed to open generated tab-delimited file for reading" + ioex);
            }
            Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
            try {
                unf = UNFUtil.calculateUNF(columnVector);
            } catch (IOException | UnfException ioex) {
                fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", integer)");
            }
        }
        if (generatedDataTable.getDataVariables().get(i).isTypeCharacter()) {
            FileInputStream generatedTabInputStream = null;
            try {
                generatedTabInputStream = new FileInputStream(generatedTabFile);
            } catch (FileNotFoundException ioex) {
                fail("Failed to open generated tab-delimited file for reading" + ioex);
            }
            String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
            String[] dateFormats = null;
            // Special handling for Character strings that encode dates and times:
            if ("time".equals(generatedDataTable.getDataVariables().get(i).getFormatCategory()) || "date".equals(generatedDataTable.getDataVariables().get(i).getFormatCategory())) {
                dateFormats = new String[expectedNumberOfCases.intValue()];
                for (int j = 0; j < expectedNumberOfCases; j++) {
                    dateFormats[j] = generatedDataTable.getDataVariables().get(i).getFormat();
                }
            }
            try {
                if (dateFormats == null) {
                    unf = UNFUtil.calculateUNF(columnVector);
                } else {
                    unf = UNFUtil.calculateUNF(columnVector, dateFormats);
                }
            } catch (IOException | UnfException iex) {
                fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", " + (dateFormats == null ? "String" : "Date/Time value") + ")");
            }
        }
        assertEquals("Variable number " + i + ":", expectedUNFs[i], unf);
    }
}
Also used : DataTable(edu.harvard.iq.dataverse.DataTable) UnfException(org.dataverse.unf.UnfException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) BufferedInputStream(java.io.BufferedInputStream) TabularDataIngest(edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest) File(java.io.File) Test(org.junit.Test)

Example 7 with DataTable

use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.

the class ExternalToolServiceBeanTest method testfindAll.

@Test
public void testfindAll() {
    DataFile dataFile = new DataFile();
    dataFile.setId(42l);
    List<DataTable> dataTables = new ArrayList<DataTable>();
    dataTables.add(new DataTable());
    dataFile.setDataTables(dataTables);
    ApiToken apiToken = new ApiToken();
    apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7");
    ExternalTool.Type type = ExternalTool.Type.EXPLORE;
    ExternalTool externalTool = new ExternalTool("displayName", "description", type, "http://foo.com", "{}");
    ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken);
    List<ExternalTool> externalTools = new ArrayList<>();
    externalTools.add(externalTool);
    List<ExternalTool> availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile);
    assertEquals(availableExternalTools.size(), 1);
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) DataTable(edu.harvard.iq.dataverse.DataTable) ArrayList(java.util.ArrayList) ApiToken(edu.harvard.iq.dataverse.authorization.users.ApiToken) Test(org.junit.Test)

Example 8 with DataTable

use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.

the class DataTableImportDDI method processLocation.

private void processLocation(XMLStreamReader xmlr, DataVariable dv, Map<String, DataTable> dataTablesMap, Map<String, Integer> varsPerFileMap) throws XMLStreamException {
    // if these fields don't convert to Long, just leave blank
    try {
        dv.setFileStartPosition(new Long(xmlr.getAttributeValue(null, "StartPos")));
    } catch (NumberFormatException ex) {
    }
    try {
        dv.setFileEndPosition(new Long(xmlr.getAttributeValue(null, "EndPos")));
    } catch (NumberFormatException ex) {
    }
    try {
        dv.setRecordSegmentNumber(new Long(xmlr.getAttributeValue(null, "RecSegNo")));
    } catch (NumberFormatException ex) {
    }
    if (dv.getDataTable() == null) {
        String fileId = xmlr.getAttributeValue(null, "fileid");
        if (fileId != null && !fileId.isEmpty()) {
            DataTable datatable = null;
            if (dataTablesMap.get(fileId) != null) {
                datatable = dataTablesMap.get(fileId);
            } else {
                datatable = new DataTable();
                dataTablesMap.put(fileId, datatable);
                varsPerFileMap.put(fileId, 0);
            }
            dv.setDataTable(datatable);
            if (datatable.getDataVariables() == null) {
                datatable.setDataVariables(new ArrayList<>());
            }
            datatable.getDataVariables().add(dv);
            int filePosition = varsPerFileMap.get(fileId);
            dv.setFileOrder(filePosition++);
            varsPerFileMap.put(fileId, filePosition);
        }
    } else {
        throw new XMLStreamException("Empty or NULL location attribute in a variable section.");
    }
}
Also used : DataTable(edu.harvard.iq.dataverse.DataTable) XMLStreamException(javax.xml.stream.XMLStreamException)

Example 9 with DataTable

use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.

the class DDIExportServiceBean method createDataFileDDI.

private void createDataFileDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSet, Set<String> includedFieldSet, DataFile df) throws XMLStreamException {
    /* This method will create both the <fileDscr> and <dataDscr><var> 
         * portions of the DDI that describe the tabular data contained in 
         * the file, the file-, datatable- and variable-level metadata; or 
         * a subset of the above, as defined by the "include" and "exclude" 
         * parameters. 
         */
    /* 
         * This method is only called when an /api/meta/file request comes 
         * in; i.e., for a study export, createFileDscr and createData/createVar 
         * methods will be called separately. So we need to create the top-level 
         * ddi (<codeBook>) tag header:
         */
    xmlw.writeStartElement("codeBook");
    xmlw.writeDefaultNamespace("http://www.icpsr.umich.edu/DDI");
    writeAttribute(xmlw, "version", "2.0");
    createStdyDscr(xmlw, excludedFieldSet, includedFieldSet, df.getOwner().getLatestVersion());
    DataTable dt = fileService.findDataTableByFileId(df.getId());
    if (checkField("fileDscr", excludedFieldSet, includedFieldSet)) {
        createFileDscr(xmlw, excludedFieldSet, null, df, dt);
    }
    // And now, the variables:
    xmlw.writeStartElement("dataDscr");
    if (checkField("var", excludedFieldSet, includedFieldSet)) {
        List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
        for (DataVariable var : vars) {
            createVarDDI(xmlw, excludedFieldSet, null, var);
        }
    }
    // dataDscr
    xmlw.writeEndElement();
    // codeBook
    xmlw.writeEndElement();
}
Also used : DataTable(edu.harvard.iq.dataverse.DataTable) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable)

Example 10 with DataTable

use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.

the class DDIExportServiceBean method createDatasetDDI.

private void createDatasetDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSet, Set<String> includedFieldSet, DatasetVersion version) throws XMLStreamException {
    xmlw.writeStartElement("codeBook");
    xmlw.writeDefaultNamespace("http://www.icpsr.umich.edu/DDI");
    writeAttribute(xmlw, "version", "2.0");
    createStdyDscr(xmlw, excludedFieldSet, includedFieldSet, version);
    // Files:
    List<FileMetadata> tabularDataFiles = new ArrayList<>();
    List<FileMetadata> otherDataFiles = new ArrayList<>();
    List<FileMetadata> fileMetadatas = version.getFileMetadatas();
    if (fileMetadatas == null || fileMetadatas.isEmpty()) {
        // codeBook
        xmlw.writeEndElement();
        return;
    }
    for (FileMetadata fileMetadata : fileMetadatas) {
        if (fileMetadata.getDataFile().isTabularData()) {
            tabularDataFiles.add(fileMetadata);
        } else {
            otherDataFiles.add(fileMetadata);
        }
    }
    if (checkField("fileDscr", excludedFieldSet, includedFieldSet)) {
        for (FileMetadata fileMetadata : tabularDataFiles) {
            DataTable dt = fileService.findDataTableByFileId(fileMetadata.getDataFile().getId());
            createFileDscr(xmlw, excludedFieldSet, includedFieldSet, fileMetadata.getDataFile(), dt);
        }
        // 2nd pass, to create data (variable) description sections:
        xmlw.writeStartElement("dataDscr");
        for (FileMetadata fileMetadata : tabularDataFiles) {
            DataTable dt = fileService.findDataTableByFileId(fileMetadata.getDataFile().getId());
            List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
            for (DataVariable var : vars) {
                createVarDDI(xmlw, excludedFieldSet, null, var);
            }
        }
        // dataDscr
        xmlw.writeEndElement();
    }
    if (checkField("othrMat", excludedFieldSet, includedFieldSet)) {
        for (FileMetadata fileMetadata : otherDataFiles) {
            createOtherMat(xmlw, excludedFieldSet, includedFieldSet, fileMetadata);
        }
    }
    // codeBook
    xmlw.writeEndElement();
}
Also used : DataTable(edu.harvard.iq.dataverse.DataTable) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) ArrayList(java.util.ArrayList) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable)

Aggregations

DataTable (edu.harvard.iq.dataverse.DataTable)16 TabularDataIngest (edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest)7 DataFile (edu.harvard.iq.dataverse.DataFile)6 IOException (java.io.IOException)6 Test (org.junit.Test)6 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)5 BufferedInputStream (java.io.BufferedInputStream)5 File (java.io.File)5 FileInputStream (java.io.FileInputStream)5 FileNotFoundException (java.io.FileNotFoundException)4 ArrayList (java.util.ArrayList)4 TabularDataFileReader (edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader)3 Dataset (edu.harvard.iq.dataverse.Dataset)2 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)2 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)2 MocksFactory.makeDataset (edu.harvard.iq.dataverse.mocks.MocksFactory.makeDataset)2 ApiToken (edu.harvard.iq.dataverse.authorization.users.ApiToken)1 VariableInterval (edu.harvard.iq.dataverse.datavariable.DataVariable.VariableInterval)1 VariableType (edu.harvard.iq.dataverse.datavariable.DataVariable.VariableType)1 BufferedReader (java.io.BufferedReader)1