Search in sources :

Example 1 with VariableCategory

use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.

the class DataConverter method getValueTableForRequestedVariables.

private static Map<String, Map<String, String>> getValueTableForRequestedVariables(List<DataVariable> dvs) {
    Map<String, Map<String, String>> vls = new LinkedHashMap<>();
    for (DataVariable dv : dvs) {
        List<VariableCategory> varCat = new ArrayList<>();
        varCat.addAll(dv.getCategories());
        Map<String, String> vl = new HashMap<>();
        for (VariableCategory vc : varCat) {
            if (vc.getLabel() != null) {
                vl.put(vc.getValue(), vc.getLabel());
            }
        }
        if (vl.size() > 0) {
            vls.put("v" + dv.getId(), vl);
        }
    }
    return vls;
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with VariableCategory

use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.

the class DTA117FileReader method readVariableTypes.

/* 
     * Variable type information is stored in the <variable_types>...</variable_types>
     * section, as number_of_variables * 2 byte values. 
     * the type codes are defined as follows: 
     * (TODO: ...)
    */
private void readVariableTypes(DataReader reader) throws IOException {
    // TODO:
    // check that we are at the right byte offset!
    logger.fine("Type section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_types());
    reader.readOpeningTag(TAG_VARIABLE_TYPES);
    List<DataVariable> variableList = new ArrayList<DataVariable>();
    // setup variableTypeList
    variableTypes = new String[dataTable.getVarQuantity().intValue()];
    for (int i = 0; i < dataTable.getVarQuantity(); i++) {
        int type = reader.readShortInteger();
        logger.fine("variable " + i + ": type=" + type);
        DataVariable dv = new DataVariable();
        dv.setInvalidRanges(new ArrayList<VariableRange>());
        dv.setSummaryStatistics(new ArrayList<SummaryStatistic>());
        dv.setCategories(new ArrayList<VariableCategory>());
        dv.setUnf("UNF:pending");
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        variableTypes[i] = configureVariableType(dv, type);
        // TODO:
        // we could also calculate the byte offset table now, rather
        // then figure it out later... - ?
        variableList.add(dv);
    }
    reader.readClosingTag(TAG_VARIABLE_TYPES);
    dataTable.setDataVariables(variableList);
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory) VariableRange(edu.harvard.iq.dataverse.datavariable.VariableRange) SummaryStatistic(edu.harvard.iq.dataverse.datavariable.SummaryStatistic) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable)

Example 3 with VariableCategory

use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.

the class DTAFileReader method parseValueLabelsReleasel108.

private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOException {
    dbgLog.fine("parseValueLabelsRelease108(): start");
    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }
    int nvar = dataTable.getVarQuantity().intValue();
    int length_label_name = constantTable.get("NAME");
    int length_value_label_header = value_label_table_length + length_label_name + VALUE_LABEL_HEADER_PADDING_LENGTH;
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("value_label_table_length=" + value_label_table_length);
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("length_value_label_header=" + length_value_label_header);
    }
    /*
         Seg  field         byte    type
         1-1. len_vlt(Seg.2)   4    int
         1-2. vlt_name      9/33    char+(\0) == name used in Sec2.part 5
         1-3. padding          3    byte
         -----------------------------------
         16/40
         2-1. n(# of vls)      4    int
         2-2. m(len_labels)    4    int
         2-3. label_offsets    4*n  int[]
         2-4. values           4*n  int[]
         2-5. labels           m    char
         */
    // This map will hold a temporary lookup table for all the categorical
    // value-label groups:
    // These groups have unique names, and a group *may be shared* between
    // multiple variables. In the method decodeDescriptorValueLabel above
    // we have populated a lookup table where variables are linked to the
    // corresponding value-label groups by name. Thus we must fully populate
    // the full map of all the variable group, then go through the list
    // of variables and create the dataverse variable categories from
    // them. -- L.A. 4.0
    Map<String, Map<String, String>> tempValueLabelTable = new LinkedHashMap<>();
    for (int i = 0; i < nvar; i++) {
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("\n\n" + i + "th value-label table header");
        }
        byte[] valueLabelHeader = new byte[length_value_label_header];
        // Part 1: reading the header of a value-label table if exists
        int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);
        if (nbytes == 0) {
            throw new IOException("reading value label header: no datum");
        }
        // 1.1 length_value_label_table
        ByteBuffer bb_value_label_header = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_header.order(ByteOrder.LITTLE_ENDIAN);
        }
        int length_value_label_table = bb_value_label_header.getInt();
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("length of this value-label table=" + length_value_label_table);
        }
        // 1.2 labelName
        String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), "ISO-8859-1");
        String labelName = getNullStrippedString(rawLabelName);
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("label name = " + labelName + "\n");
        }
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine(i + "-th value-label table");
        }
        // Part 2: reading the value-label table
        byte[] valueLabelTable_i = new byte[length_value_label_table];
        int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
        if (noBytes == 0) {
            throw new IOException("reading value label table: no datum");
        }
        // 2-1. 4-byte-integer: number of units in this table (n)
        int valueLabelTable_offset = 0;
        ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset, value_label_table_length);
        if (isLittleEndian) {
            bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
        }
        int no_value_label_pairs = bb_value_label_pairs.getInt();
        valueLabelTable_offset += value_label_table_length;
        if (dbgLog.isLoggable(Level.FINE)) {
            dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);
        }
        // 2-2. 4-byte-integer: length of the label section (m bytes)
        ByteBuffer bb_length_label_segment = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset, value_label_table_length);
        if (isLittleEndian) {
            bb_length_label_segment.order(ByteOrder.LITTLE_ENDIAN);
        }
        int length_label_segment = bb_length_label_segment.getInt();
        valueLabelTable_offset += value_label_table_length;
        // 2-3. 4-byte-integer array (4xm): offset values for the label sec.
        // these "label offsets" actually appear to represent the byte
        // offsets of the label strings, as stored in the next section.
        // as of now, these are not used for anything, and the code
        // below assumes that the labels are already in the same
        // order as the numeric values! -- L.A.
        int[] label_offsets = new int[no_value_label_pairs];
        int byte_offset = valueLabelTable_offset;
        for (int j = 0; j < no_value_label_pairs; j++) {
            // note: 4-byte singed, not java's long
            ByteBuffer bb_label_offset = ByteBuffer.wrap(valueLabelTable_i, byte_offset, value_label_table_length);
            if (isLittleEndian) {
                bb_label_offset.order(ByteOrder.LITTLE_ENDIAN);
                dbgLog.fine("label offset: byte reversed");
            }
            label_offsets[j] = bb_label_offset.getInt();
            dbgLog.fine("label offset [" + j + "]: " + label_offsets[j]);
            byte_offset += value_label_table_length;
        }
        // 2-4. 4-byte-integer array (4xm): value array (sorted)
        dbgLog.fine("value array");
        int[] valueList = new int[no_value_label_pairs];
        int offset_value = byte_offset;
        for (int k = 0; k < no_value_label_pairs; k++) {
            ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value, value_label_table_length);
            if (isLittleEndian) {
                bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
            }
            valueList[k] = bb_value_list.getInt();
            offset_value += value_label_table_length;
        }
        // 2-5. m-byte chars that store label data (m units of labels)
        String label_segment = new String(Arrays.copyOfRange(valueLabelTable_i, offset_value, (length_label_segment + offset_value)), "ISO-8859-1");
        // L.A. -- 2011.2.25:
        // This assumes that the labels are already stored in the right
        // order: (see my comment for the section 2.3 above)
        // String[] labelList = label_segment.split("\0");
        // Instead, we should be using the offset values obtained in
        // the section 2.3 above, and select the corresponding
        // substrings:
        String[] labelList = new String[no_value_label_pairs];
        for (int l = 0; l < no_value_label_pairs; l++) {
            String lblString = null;
            int lblOffset = label_offsets[l];
            lblString = label_segment.substring(lblOffset);
            int nullIndx = lblString.indexOf('\000');
            if (nullIndx > -1) {
                lblString = lblString.substring(0, nullIndx);
            }
            labelList[l] = lblString;
        }
        // this should work! -- L.A.
        // (TODO: check the v105 value label parsing method, to see if
        // something similar applies there)
        // Finally, we've reached the actual value-label pairs. We'll go
        // through them and put them on the temporary lookup map:
        tempValueLabelTable.put(labelName, new LinkedHashMap<>());
        for (int l = 0; l < no_value_label_pairs; l++) {
            if (dbgLog.isLoggable(Level.FINE)) {
                dbgLog.fine(l + "-th pair:" + valueList[l] + "[" + labelList[l] + "]");
            }
            // TODO: do we need any null/empty string checks here? -- L.A. 4.0
            tempValueLabelTable.get(labelName).put(Integer.toString(valueList[l]), labelList[l]);
        }
        if (stream.available() == 0) {
            // reached the end of the file
            dbgLog.fine("reached the end of the file at " + i + "th value-label Table");
            break;
        }
    }
    // -- L.A. 4.0
    for (int i = 0; i < nvar; i++) {
        if (valueLabelsLookupTable[i] != null) {
            if (tempValueLabelTable.get(valueLabelsLookupTable[i]) != null) {
                // -- L.A.
                for (String value : tempValueLabelTable.get(valueLabelsLookupTable[i]).keySet()) {
                    VariableCategory cat = new VariableCategory();
                    cat.setValue(value);
                    cat.setLabel(tempValueLabelTable.get(valueLabelsLookupTable[i]).get(value));
                    /* cross-link the variable and category to each other: */
                    cat.setDataVariable(dataTable.getDataVariables().get(i));
                    dataTable.getDataVariables().get(i).getCategories().add(cat);
                }
            }
        }
    }
    dbgLog.fine("parseValueLabelsRelease108(): end");
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory)

Example 4 with VariableCategory

use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.

the class PORFileReader method assignValueLabels.

void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
    for (int i = 0; i < dataTable.getVarQuantity().intValue(); i++) {
        String varName = dataTable.getDataVariables().get(i).getName();
        Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
        if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
            for (String value : valueLabelPairs.keySet()) {
                VariableCategory cat = new VariableCategory();
                cat.setValue(value);
                cat.setLabel(valueLabelPairs.get(value));
                /* cross-link the variable and category to each other: */
                cat.setDataVariable(dataTable.getDataVariables().get(i));
                dataTable.getDataVariables().get(i).getCategories().add(cat);
            }
        }
    }
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory)

Example 5 with VariableCategory

use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.

the class SAVFileReader method assignValueLabels.

void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
    for (int i = 0; i < dataTable.getVarQuantity().intValue(); i++) {
        String varName = dataTable.getDataVariables().get(i).getName();
        Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
        if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
            for (String value : valueLabelPairs.keySet()) {
                VariableCategory cat = new VariableCategory();
                cat.setValue(value);
                cat.setLabel(valueLabelPairs.get(value));
                /* cross-link the variable and category to each other: */
                cat.setDataVariable(dataTable.getDataVariables().get(i));
                dataTable.getDataVariables().get(i).getCategories().add(cat);
            }
        }
    }
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory)

Aggregations

VariableCategory (edu.harvard.iq.dataverse.datavariable.VariableCategory)11 SummaryStatistic (edu.harvard.iq.dataverse.datavariable.SummaryStatistic)4 VariableRange (edu.harvard.iq.dataverse.datavariable.VariableRange)4 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)3 XMLStreamException (javax.xml.stream.XMLStreamException)2 InvalidData (edu.harvard.iq.dataverse.ingest.tabulardata.InvalidData)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 REXPMismatchException (org.rosuda.REngine.REXPMismatchException)1 RList (org.rosuda.REngine.RList)1