use of org.talend.dataprep.api.dataset.row.LightweightExportableDataSet in project data-prep by Talend.
the class LookupTest method cacheUsStates.
private void cacheUsStates() {
LightweightExportableDataSet usStates = new LightweightExportableDataSet();
ColumnMetadata[] columnArrays = { ColumnMetadata.Builder.column().name("Postal").domain("US_STATE").type(Type.STRING).build(), ColumnMetadata.Builder.column().name("State").domain("US_STATE").type(Type.STRING).build(), ColumnMetadata.Builder.column().name("Capital").domain("CITY").type(Type.STRING).build() };
List<ColumnMetadata> columns = Arrays.stream(columnArrays).collect(Collectors.toList());
usStates.setMetadata(new RowMetadata(columns));
Map<String, String>[] rows = new HashMap[5];
rows[0] = getValuesMap("GA", "Georgia", "Atlanta");
rows[1] = getValuesMap("FL", "Florida", "Tallahassee");
rows[2] = getValuesMap("IL", "Illinois", "Springfield");
rows[3] = getValuesMap("TX", "Texas", "Austin");
rows[4] = getValuesMap("CA", "California", "Sacramento");
Arrays.stream(rows).forEach(r -> usStates.addRecord((String) r.get("0000"), r));
LookupDatasetsManager.put("us_states", usStates);
}
use of org.talend.dataprep.api.dataset.row.LightweightExportableDataSet in project data-prep by Talend.
the class DataSetDataReader method parseAndMapLookupDataSet.
/**
* Reads and Maps the data set from the specified input stream.
* <p><strong>Does NOT close the supplied {@link InputStream}</strong></p>
*
* @param inputStream the input stream containing the data set
* @param joinOnColumn the column used to join the lookup data set
* @return a map which associates to each value of the joint column its corresponding data set row
* @throws IOException In case of JSON exception related error.
* @throws IllegalArgumentException If the input stream is not of the expected JSON structure.
*/
public LightweightExportableDataSet parseAndMapLookupDataSet(InputStream inputStream, String joinOnColumn) throws IOException {
Validate.isTrue(inputStream != null, "The provided input stream must not be null");
try (JsonParser jsonParser = mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false).getFactory().createParser(new InputStreamReader(inputStream, UTF_8))) {
LightweightExportableDataSet lookupDataset = new LightweightExportableDataSet();
RowMetadata rowMetadata = new RowMetadata();
JsonToken currentToken = jsonParser.nextToken();
Validate.isTrue(currentToken == JsonToken.START_OBJECT, INCORRECT_OBJECT_STRUCTURE_ERROR_MESSAGE);
while (currentToken != JsonToken.END_OBJECT && !jsonParser.isClosed()) {
currentToken = jsonParser.nextToken();
String currentField = jsonParser.getCurrentName();
if ("metadata".equalsIgnoreCase(currentField)) {
// advance to start object
JsonToken metadataStartToken = jsonParser.nextToken();
Validate.isTrue(metadataStartToken == JsonToken.START_OBJECT, INCORRECT_OBJECT_STRUCTURE_ERROR_MESSAGE);
rowMetadata = parseDataSetMetadataAndReturnRowMetadata(jsonParser);
lookupDataset.setMetadata(rowMetadata);
} else if ("records".equalsIgnoreCase(currentField)) {
// advance to start object
JsonToken recordsStartToken = jsonParser.nextToken();
Validate.isTrue(recordsStartToken == JsonToken.START_ARRAY, INCORRECT_OBJECT_STRUCTURE_ERROR_MESSAGE);
lookupDataset.setRecords(parseRecords(jsonParser, rowMetadata, joinOnColumn));
}
}
if (lookupDataset.isEmpty()) {
throw new IOException("No lookup data has been retrieved when trying to parse the specified data set.");
}
return lookupDataset;
}
}
use of org.talend.dataprep.api.dataset.row.LightweightExportableDataSet in project data-prep by Talend.
the class LookupTest method cacheNBA.
private void cacheNBA() {
LightweightExportableDataSet usStates = new LightweightExportableDataSet();
Map<String, String> values = getValuesMap("Southwest", "Dallas Mavericks", "Dallas", "TX", "American Airlines Center", "", "32.790556°N 96.810278°W");
DataSetRow row = new DataSetRow(values);
row.getRowMetadata().getColumns().get(1).setName("Team");
row.getRowMetadata().getColumns().get(4).setName("Stadium");
row.getRowMetadata().getColumns().get(6).setName("Coordinates");
usStates.setMetadata(row.getRowMetadata());
usStates.addRecord("TX", values);
LookupDatasetsManager.put("nba", usStates);
}
Aggregations