use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class DataConverter method getValueTableForRequestedVariables.
private static Map<String, Map<String, String>> getValueTableForRequestedVariables(List<DataVariable> dvs) {
Map<String, Map<String, String>> vls = new LinkedHashMap<>();
for (DataVariable dv : dvs) {
List<VariableCategory> varCat = new ArrayList<>();
varCat.addAll(dv.getCategories());
Map<String, String> vl = new HashMap<>();
for (VariableCategory vc : varCat) {
if (vc.getLabel() != null) {
vl.put(vc.getValue(), vc.getLabel());
}
}
if (vl.size() > 0) {
vls.put("v" + dv.getId(), vl);
}
}
return vls;
}
use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class DTA117FileReader method readVariableTypes.
/*
* Variable type information is stored in the <variable_types>...</variable_types>
* section, as number_of_variables * 2 byte values.
* the type codes are defined as follows:
* (TODO: ...)
*/
private void readVariableTypes(DataReader reader) throws IOException {
// TODO:
// check that we are at the right byte offset!
logger.fine("Type section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_types());
reader.readOpeningTag(TAG_VARIABLE_TYPES);
List<DataVariable> variableList = new ArrayList<DataVariable>();
// setup variableTypeList
variableTypes = new String[dataTable.getVarQuantity().intValue()];
for (int i = 0; i < dataTable.getVarQuantity(); i++) {
int type = reader.readShortInteger();
logger.fine("variable " + i + ": type=" + type);
DataVariable dv = new DataVariable();
dv.setInvalidRanges(new ArrayList<VariableRange>());
dv.setSummaryStatistics(new ArrayList<SummaryStatistic>());
dv.setCategories(new ArrayList<VariableCategory>());
dv.setUnf("UNF:pending");
dv.setFileOrder(i);
dv.setDataTable(dataTable);
variableTypes[i] = configureVariableType(dv, type);
// TODO:
// we could also calculate the byte offset table now, rather
// then figure it out later... - ?
variableList.add(dv);
}
reader.readClosingTag(TAG_VARIABLE_TYPES);
dataTable.setDataVariables(variableList);
}
use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class DTAFileReader method parseValueLabelsReleasel108.
private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOException {
dbgLog.fine("parseValueLabelsRelease108(): start");
if (stream == null) {
throw new IllegalArgumentException("stream == null!");
}
int nvar = dataTable.getVarQuantity().intValue();
int length_label_name = constantTable.get("NAME");
int length_value_label_header = value_label_table_length + length_label_name + VALUE_LABEL_HEADER_PADDING_LENGTH;
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("value_label_table_length=" + value_label_table_length);
}
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("length_value_label_header=" + length_value_label_header);
}
/*
Seg field byte type
1-1. len_vlt(Seg.2) 4 int
1-2. vlt_name 9/33 char+(\0) == name used in Sec2.part 5
1-3. padding 3 byte
-----------------------------------
16/40
2-1. n(# of vls) 4 int
2-2. m(len_labels) 4 int
2-3. label_offsets 4*n int[]
2-4. values 4*n int[]
2-5. labels m char
*/
// This map will hold a temporary lookup table for all the categorical
// value-label groups:
// These groups have unique names, and a group *may be shared* between
// multiple variables. In the method decodeDescriptorValueLabel above
// we have populated a lookup table where variables are linked to the
// corresponding value-label groups by name. Thus we must fully populate
// the full map of all the variable group, then go through the list
// of variables and create the dataverse variable categories from
// them. -- L.A. 4.0
Map<String, Map<String, String>> tempValueLabelTable = new LinkedHashMap<>();
for (int i = 0; i < nvar; i++) {
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("\n\n" + i + "th value-label table header");
}
byte[] valueLabelHeader = new byte[length_value_label_header];
// Part 1: reading the header of a value-label table if exists
int nbytes = stream.read(valueLabelHeader, 0, length_value_label_header);
if (nbytes == 0) {
throw new IOException("reading value label header: no datum");
}
// 1.1 length_value_label_table
ByteBuffer bb_value_label_header = ByteBuffer.wrap(valueLabelHeader, 0, value_label_table_length);
if (isLittleEndian) {
bb_value_label_header.order(ByteOrder.LITTLE_ENDIAN);
}
int length_value_label_table = bb_value_label_header.getInt();
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("length of this value-label table=" + length_value_label_table);
}
// 1.2 labelName
String rawLabelName = new String(Arrays.copyOfRange(valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), "ISO-8859-1");
String labelName = getNullStrippedString(rawLabelName);
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("label name = " + labelName + "\n");
}
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine(i + "-th value-label table");
}
// Part 2: reading the value-label table
byte[] valueLabelTable_i = new byte[length_value_label_table];
int noBytes = stream.read(valueLabelTable_i, 0, length_value_label_table);
if (noBytes == 0) {
throw new IOException("reading value label table: no datum");
}
// 2-1. 4-byte-integer: number of units in this table (n)
int valueLabelTable_offset = 0;
ByteBuffer bb_value_label_pairs = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset, value_label_table_length);
if (isLittleEndian) {
bb_value_label_pairs.order(ByteOrder.LITTLE_ENDIAN);
}
int no_value_label_pairs = bb_value_label_pairs.getInt();
valueLabelTable_offset += value_label_table_length;
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine("no_value_label_pairs=" + no_value_label_pairs);
}
// 2-2. 4-byte-integer: length of the label section (m bytes)
ByteBuffer bb_length_label_segment = ByteBuffer.wrap(valueLabelTable_i, valueLabelTable_offset, value_label_table_length);
if (isLittleEndian) {
bb_length_label_segment.order(ByteOrder.LITTLE_ENDIAN);
}
int length_label_segment = bb_length_label_segment.getInt();
valueLabelTable_offset += value_label_table_length;
// 2-3. 4-byte-integer array (4xm): offset values for the label sec.
// these "label offsets" actually appear to represent the byte
// offsets of the label strings, as stored in the next section.
// as of now, these are not used for anything, and the code
// below assumes that the labels are already in the same
// order as the numeric values! -- L.A.
int[] label_offsets = new int[no_value_label_pairs];
int byte_offset = valueLabelTable_offset;
for (int j = 0; j < no_value_label_pairs; j++) {
// note: 4-byte singed, not java's long
ByteBuffer bb_label_offset = ByteBuffer.wrap(valueLabelTable_i, byte_offset, value_label_table_length);
if (isLittleEndian) {
bb_label_offset.order(ByteOrder.LITTLE_ENDIAN);
dbgLog.fine("label offset: byte reversed");
}
label_offsets[j] = bb_label_offset.getInt();
dbgLog.fine("label offset [" + j + "]: " + label_offsets[j]);
byte_offset += value_label_table_length;
}
// 2-4. 4-byte-integer array (4xm): value array (sorted)
dbgLog.fine("value array");
int[] valueList = new int[no_value_label_pairs];
int offset_value = byte_offset;
for (int k = 0; k < no_value_label_pairs; k++) {
ByteBuffer bb_value_list = ByteBuffer.wrap(valueLabelTable_i, offset_value, value_label_table_length);
if (isLittleEndian) {
bb_value_list.order(ByteOrder.LITTLE_ENDIAN);
}
valueList[k] = bb_value_list.getInt();
offset_value += value_label_table_length;
}
// 2-5. m-byte chars that store label data (m units of labels)
String label_segment = new String(Arrays.copyOfRange(valueLabelTable_i, offset_value, (length_label_segment + offset_value)), "ISO-8859-1");
// L.A. -- 2011.2.25:
// This assumes that the labels are already stored in the right
// order: (see my comment for the section 2.3 above)
// String[] labelList = label_segment.split("\0");
// Instead, we should be using the offset values obtained in
// the section 2.3 above, and select the corresponding
// substrings:
String[] labelList = new String[no_value_label_pairs];
for (int l = 0; l < no_value_label_pairs; l++) {
String lblString = null;
int lblOffset = label_offsets[l];
lblString = label_segment.substring(lblOffset);
int nullIndx = lblString.indexOf('\000');
if (nullIndx > -1) {
lblString = lblString.substring(0, nullIndx);
}
labelList[l] = lblString;
}
// this should work! -- L.A.
// (TODO: check the v105 value label parsing method, to see if
// something similar applies there)
// Finally, we've reached the actual value-label pairs. We'll go
// through them and put them on the temporary lookup map:
tempValueLabelTable.put(labelName, new LinkedHashMap<>());
for (int l = 0; l < no_value_label_pairs; l++) {
if (dbgLog.isLoggable(Level.FINE)) {
dbgLog.fine(l + "-th pair:" + valueList[l] + "[" + labelList[l] + "]");
}
// TODO: do we need any null/empty string checks here? -- L.A. 4.0
tempValueLabelTable.get(labelName).put(Integer.toString(valueList[l]), labelList[l]);
}
if (stream.available() == 0) {
// reached the end of the file
dbgLog.fine("reached the end of the file at " + i + "th value-label Table");
break;
}
}
// -- L.A. 4.0
for (int i = 0; i < nvar; i++) {
if (valueLabelsLookupTable[i] != null) {
if (tempValueLabelTable.get(valueLabelsLookupTable[i]) != null) {
// -- L.A.
for (String value : tempValueLabelTable.get(valueLabelsLookupTable[i]).keySet()) {
VariableCategory cat = new VariableCategory();
cat.setValue(value);
cat.setLabel(tempValueLabelTable.get(valueLabelsLookupTable[i]).get(value));
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(i));
dataTable.getDataVariables().get(i).getCategories().add(cat);
}
}
}
}
dbgLog.fine("parseValueLabelsRelease108(): end");
}
use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class PORFileReader method assignValueLabels.
void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
for (int i = 0; i < dataTable.getVarQuantity().intValue(); i++) {
String varName = dataTable.getDataVariables().get(i).getName();
Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
for (String value : valueLabelPairs.keySet()) {
VariableCategory cat = new VariableCategory();
cat.setValue(value);
cat.setLabel(valueLabelPairs.get(value));
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(i));
dataTable.getDataVariables().get(i).getCategories().add(cat);
}
}
}
}
use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class SAVFileReader method assignValueLabels.
void assignValueLabels(Map<String, Map<String, String>> valueLabelTable) {
for (int i = 0; i < dataTable.getVarQuantity().intValue(); i++) {
String varName = dataTable.getDataVariables().get(i).getName();
Map<String, String> valueLabelPairs = valueLabelTable.get(valueVariableMappingTable.get(varName));
if (valueLabelPairs != null && !valueLabelPairs.isEmpty()) {
for (String value : valueLabelPairs.keySet()) {
VariableCategory cat = new VariableCategory();
cat.setValue(value);
cat.setLabel(valueLabelPairs.get(value));
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(i));
dataTable.getDataVariables().get(i).getCategories().add(cat);
}
}
}
}
Aggregations