use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.
the class MTable method readCsvFromFile.
public static MTable readCsvFromFile(BufferedReader reader, String schemaStr) throws IOException {
TableSchema schema = CsvUtil.schemaStr2Schema(schemaStr);
CsvParser parser = new CsvParser(schema.getFieldTypes(), ",", '"');
List<Row> rows = new ArrayList<>();
while (true) {
String line = reader.readLine();
if (null == line) {
break;
}
Tuple2<Boolean, Row> tuple2 = parser.parse(line);
if (tuple2.f0) {
rows.add(tuple2.f1);
} else {
throw new RuntimeException("Fail to parse line: \"" + line + "\"");
}
}
return new MTable(rows, schema);
}
use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.
the class LocalPredictorLoader method loadLocalPredictor.
/**
* Create a local predictor by loading model from a CSV file, without launching Flink job.
*
* @param modelPath Path of the model file in CSV format. Can be local file or remote file.
* @param inputSchema Test data schema.
* @return
* @throws Exception
*/
@Deprecated
public static LocalPredictor loadLocalPredictor(String modelPath, TableSchema inputSchema) throws Exception {
Map<Long, List<Row>> rows = readPipelineModelRowsFromCsvFile(modelPath, LegacyModelExporterUtils.PIPELINE_MODEL_SCHEMA);
Preconditions.checkState(rows.containsKey(-1L), "can't find meta in model.");
String meta = (String) rows.get(-1L).get(0).getField(1);
PipelineStageBase[] transformers = constructPipelineStagesFromMeta(meta, 0L);
String[] modelSchemaStr = JsonConverter.fromJson(JsonPath.read(meta, "$.schema").toString(), String[].class);
LocalPredictor predictor = null;
TableSchema schema = inputSchema;
for (int i = 0; i < transformers.length; i++) {
PipelineStageBase transformer = transformers[i];
LocalPredictor localPredictor;
if (transformer instanceof MapModel) {
MapModel<?> mapModel = (MapModel) transformer;
ModelMapper mapper = mapModel.mapperBuilder.apply(CsvUtil.schemaStr2Schema(modelSchemaStr[i]), schema, mapModel.getParams());
CsvParser csvParser = new CsvParser(CsvUtil.getColTypes(modelSchemaStr[i]), "^", '\'');
List<Row> modelRows = rows.get((long) i);
int s = modelRows.size();
for (int j = 0; j < s; j++) {
Row r = modelRows.get(j);
r = csvParser.parse((String) r.getField(1)).f1;
modelRows.set(j, r);
}
mapper.loadModel(modelRows);
localPredictor = new LocalPredictor(mapper);
} else {
localPredictor = ((LocalPredictable) transformer).collectLocalPredictor(schema);
}
schema = localPredictor.getOutputSchema();
if (predictor == null) {
predictor = localPredictor;
} else {
predictor.merge(localPredictor);
}
}
return predictor;
}
use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.
the class LocalPredictorLoader method loadLocalPredictor.
/**
* Create a local predictor by loading model from a List<Row> format, without launching Flink job.
*
* @param modelRows model in list<Row></Row> format.
* @return
* @throws Exception
*/
@Deprecated
public static LocalPredictor loadLocalPredictor(List<Row> modelRows, TableSchema dataSchema) throws Exception {
Map<Long, List<Row>> mapRows = getMapModelRows(modelRows);
String meta = (String) mapRows.get(-1L).get(0).getField(1);
PipelineStageBase[] transformers = constructPipelineStagesFromMeta(meta, 0L);
String[] modelSchemaStr = JsonConverter.fromJson(JsonPath.read(meta, "$.schema").toString(), String[].class);
LocalPredictor predictor = null;
TableSchema schema = dataSchema;
for (int i = 0; i < transformers.length; i++) {
PipelineStageBase transformer = transformers[i];
LocalPredictor localPredictor = null;
if (transformer instanceof MapModel) {
MapModel<?> mapModel = (MapModel) transformer;
ModelMapper mapper = mapModel.mapperBuilder.apply(CsvUtil.schemaStr2Schema(modelSchemaStr[i]), schema, mapModel.getParams());
CsvParser csvParser = new CsvParser(CsvUtil.getColTypes(modelSchemaStr[i]), "^", '\'');
List<Row> singleModelRows = mapRows.get((long) i);
int s = singleModelRows.size();
for (int j = 0; j < s; j++) {
Row r = singleModelRows.get(j);
r = csvParser.parse((String) r.getField(1)).f1;
singleModelRows.set(j, r);
}
mapper.loadModel(singleModelRows);
localPredictor = new LocalPredictor(mapper);
} else {
localPredictor = ((LocalPredictable) transformer).collectLocalPredictor(schema);
}
schema = localPredictor.getOutputSchema();
if (predictor == null) {
predictor = localPredictor;
} else {
predictor.merge(localPredictor);
}
}
return predictor;
}
use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.
the class LegacyModelExporterUtils method unpackBatchOp.
/**
* Unpack a BatchOperator.
*/
@Deprecated
private static BatchOperator<?> unpackBatchOp(BatchOperator<?> data, TableSchema schema) {
DataSet<Row> rows = data.getDataSet();
final TypeInformation<?>[] types = schema.getFieldTypes();
rows = rows.map(new RichMapFunction<Row, Row>() {
private static final long serialVersionUID = 7791442624358724472L;
private transient CsvParser parser;
@Override
public void open(Configuration parameters) throws Exception {
parser = new CsvParser(types, "^", '\'');
}
@Override
public Row map(Row value) throws Exception {
return parser.parse((String) value.getField(1)).f1;
}
});
return BatchOperator.fromTable(DataSetConversionUtil.toTable(data.getMLEnvironmentId(), rows, schema)).setMLEnvironmentId(data.getMLEnvironmentId());
}
use of com.alibaba.alink.operator.common.io.csv.CsvParser in project Alink by alibaba.
the class LocalPredictorLoader method readPipelineModelRowsFromCsvFile.
@Deprecated
static Map<Long, List<Row>> readPipelineModelRowsFromCsvFile(String filePath, TableSchema schema) throws Exception {
Map<Long, List<Row>> rows = new HashMap<>();
Path path = new Path(filePath);
FileSystem fs = FileSystem.get(path.toUri());
FSDataInputStream stream = fs.open(path);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
CsvParser csvParser = new CsvParser(schema.getFieldTypes(), ",", '"');
while (reader.ready()) {
String line = reader.readLine();
Tuple2<Boolean, Row> parsed = csvParser.parse(line);
Preconditions.checkState(parsed.f0, "Fail to parse line: " + line);
Long id = (Long) parsed.f1.getField(0);
if (rows.containsKey(id)) {
rows.get(id).add(parsed.f1);
} else {
List<Row> r = new ArrayList<>();
r.add(parsed.f1);
rows.put(id, r);
}
}
reader.close();
stream.close();
return rows;
}
Aggregations