use of org.apache.commons.io.LineIterator in project deeplearning4j by deeplearning4j.
the class Word2VecDataFetcher method next.
@Override
public DataSet next() {
//pop from cache when possible, or when there's nothing left
if (cache.size() >= batch || !files.hasNext())
return fromCache();
File f = files.next();
try {
LineIterator lines = FileUtils.lineIterator(f);
INDArray outcomes = null;
INDArray input = null;
while (lines.hasNext()) {
List<Window> windows = Windows.windows(lines.nextLine());
if (windows.isEmpty() && lines.hasNext())
continue;
if (windows.size() < batch) {
input = Nd4j.create(windows.size(), vec.lookupTable().layerSize() * vec.getWindow());
outcomes = Nd4j.create(batch, labels.size());
for (int i = 0; i < windows.size(); i++) {
input.putRow(i, WindowConverter.asExampleMatrix(cache.get(i), vec));
int idx = labels.indexOf(windows.get(i).getLabel());
if (idx < 0)
idx = 0;
INDArray outcomeRow = FeatureUtil.toOutcomeVector(idx, labels.size());
outcomes.putRow(i, outcomeRow);
}
return new DataSet(input, outcomes);
} else {
input = Nd4j.create(batch, vec.lookupTable().layerSize() * vec.getWindow());
outcomes = Nd4j.create(batch, labels.size());
for (int i = 0; i < batch; i++) {
input.putRow(i, WindowConverter.asExampleMatrix(cache.get(i), vec));
int idx = labels.indexOf(windows.get(i).getLabel());
if (idx < 0)
idx = 0;
INDArray outcomeRow = FeatureUtil.toOutcomeVector(idx, labels.size());
outcomes.putRow(i, outcomeRow);
}
/*
* Note that I'm aware of possible concerns for sentence sequencing.
* This is a hack right now in place of something
* that will be way more elegant in the future.
*/
if (windows.size() > batch) {
List<Window> leftOvers = windows.subList(batch, windows.size());
cache.addAll(leftOvers);
}
return new DataSet(input, outcomes);
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return null;
}
use of org.apache.commons.io.LineIterator in project druid by druid-io.
the class AggregationTestHelper method createIndex.
public void createIndex(InputStream inputDataStream, String parserJson, String aggregators, File outDir, long minTimestamp, Granularity gran, int maxRowCount) throws Exception {
try {
StringInputRowParser parser = mapper.readValue(parserJson, StringInputRowParser.class);
LineIterator iter = IOUtils.lineIterator(inputDataStream, "UTF-8");
List<AggregatorFactory> aggregatorSpecs = mapper.readValue(aggregators, new TypeReference<List<AggregatorFactory>>() {
});
createIndex(iter, parser, aggregatorSpecs.toArray(new AggregatorFactory[0]), outDir, minTimestamp, gran, true, maxRowCount);
} finally {
Closeables.close(inputDataStream, true);
}
}
use of org.apache.commons.io.LineIterator in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method loadTxt.
/**
* Loads an in memory cache from the given path (sets syn0 and the vocab)
*
* @param vectorsFile the path of the file to load
* @return a Pair holding the lookup table and the vocab cache.
* @throws FileNotFoundException if the input file does not exist
*/
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException, UnsupportedEncodingException {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(vectorsFile), "UTF-8"));
AbstractCache cache = new AbstractCache<>();
LineIterator iter = IOUtils.lineIterator(reader);
String line = null;
boolean hasHeader = false;
if (iter.hasNext()) {
// skip header line
line = iter.nextLine();
//look for spaces
if (!line.contains(" ")) {
log.debug("Skipping first line");
hasHeader = true;
} else {
// we should check for something that looks like proper word vectors here. i.e: 1 word at the 0 position, and bunch of floats further
String[] split = line.split(" ");
try {
long[] header = new long[split.length];
for (int x = 0; x < split.length; x++) {
header[x] = Long.parseLong(split[x]);
}
if (split.length < 4)
hasHeader = true;
// [2] - number of documents <-- DL4j-only value
if (split.length == 3)
cache.incrementTotalDocCount(header[2]);
printOutProjectedMemoryUse(header[0], (int) header[1], 1);
hasHeader = true;
try {
reader.close();
} catch (Exception ex) {
}
} catch (Exception e) {
// if any conversion exception hits - that'll be considered header
hasHeader = false;
}
}
}
//reposition buffer to be one line ahead
if (hasHeader) {
line = "";
iter.close();
reader = new BufferedReader(new FileReader(vectorsFile));
iter = IOUtils.lineIterator(reader);
iter.nextLine();
}
List<INDArray> arrays = new ArrayList<>();
while (iter.hasNext()) {
if (line.isEmpty())
line = iter.nextLine();
String[] split = line.split(" ");
//split[0].replaceAll(whitespaceReplacement, " ");
String word = decodeB64(split[0]);
VocabWord word1 = new VocabWord(1.0, word);
word1.setIndex(cache.numWords());
cache.addToken(word1);
cache.addWordToIndex(word1.getIndex(), word);
cache.putVocabWord(word);
float[] vector = new float[split.length - 1];
for (int i = 1; i < split.length; i++) {
vector[i - 1] = Float.parseFloat(split[i]);
}
INDArray row = Nd4j.create(vector);
arrays.add(row);
// workaround for skipped first row
line = "";
}
INDArray syn = Nd4j.vstack(arrays);
InMemoryLookupTable lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().vectorLength(arrays.get(0).columns()).useAdaGrad(false).cache(cache).useHierarchicSoftmax(false).build();
if (Nd4j.ENFORCE_NUMERICAL_STABILITY)
Nd4j.clearNans(syn);
lookupTable.setSyn0(syn);
iter.close();
try {
reader.close();
} catch (Exception e) {
}
return new Pair<>(lookupTable, (VocabCache) cache);
}
use of org.apache.commons.io.LineIterator in project deeplearning4j by deeplearning4j.
the class GloveWeightLookupTable method load.
/**
* Load a glove model from an input stream.
* The format is:
* word num1 num2....
* @param is the input stream to read from for the weights
* @param vocab the vocab for the lookuptable
* @return the loaded model
* @throws java.io.IOException if one occurs
*/
public static GloveWeightLookupTable load(InputStream is, VocabCache<? extends SequenceElement> vocab) throws IOException {
LineIterator iter = IOUtils.lineIterator(is, "UTF-8");
GloveWeightLookupTable glove = null;
Map<String, float[]> wordVectors = new HashMap<>();
while (iter.hasNext()) {
String line = iter.nextLine().trim();
if (line.isEmpty())
continue;
String[] split = line.split(" ");
String word = split[0];
if (glove == null)
glove = new GloveWeightLookupTable.Builder().cache(vocab).vectorLength(split.length - 1).build();
if (word.isEmpty())
continue;
float[] read = read(split, glove.layerSize());
if (read.length < 1)
continue;
wordVectors.put(word, read);
}
glove.setSyn0(weights(glove, wordVectors, vocab));
glove.resetWeights(false);
iter.close();
return glove;
}
use of org.apache.commons.io.LineIterator in project opennms by OpenNMS.
the class JettyConfigMigratorOffline method execute.
/* (non-Javadoc)
* @see org.opennms.upgrade.api.OnmsUpgrade#execute()
*/
@Override
public void execute() throws OnmsUpgradeException {
String jettySSL = getMainProperties().getProperty("org.opennms.netmgt.jetty.https-port", null);
String jettyAJP = getMainProperties().getProperty("org.opennms.netmgt.jetty.ajp-port", null);
boolean sslWasFixed = false;
boolean ajpWasFixed = false;
try {
log("SSL Enabled ? %s\n", jettySSL != null);
log("AJP Enabled ? %s\n", jettyAJP != null);
if (jettySSL != null || jettyAJP != null) {
File jettyXmlExample = new File(getHomeDirectory(), "etc" + File.separator + "examples" + File.separator + "jetty.xml");
File jettyXml = new File(getHomeDirectory(), "etc" + File.separator + "jetty.xml");
if (!jettyXml.exists() && !jettyXmlExample.exists()) {
throw new FileNotFoundException("The required file doesn't exist: " + jettyXmlExample);
}
if (!jettyXml.exists()) {
log("Copying %s into %s\n", jettyXmlExample, jettyXml);
FileUtils.copyFile(jettyXmlExample, jettyXml);
}
log("Creating %s\n", jettyXml);
File tempFile = new File(jettyXml.getAbsoluteFile() + ".tmp");
FileWriter w = new FileWriter(tempFile);
LineIterator it = FileUtils.lineIterator(jettyXmlExample);
boolean startSsl = false;
boolean startAjp = false;
while (it.hasNext()) {
String line = it.next();
if (startAjp) {
if (line.matches("^\\s+[<][!]--\\s*$")) {
continue;
}
if (line.matches("^\\s+--[>]\\s*$")) {
startAjp = false;
ajpWasFixed = true;
continue;
}
}
if (startSsl) {
if (line.matches("^\\s+[<][!]--\\s*$")) {
continue;
}
if (line.matches("^\\s+--[>]\\s*$")) {
startSsl = false;
sslWasFixed = true;
continue;
}
}
w.write(line + "\n");
if (startAjp == false && line.contains("<!-- Add AJP support -->") && jettyAJP != null) {
startAjp = true;
log("Enabling AjpConnector\n");
}
if (startSsl == false && line.contains("<!-- Add HTTPS support -->") && jettySSL != null) {
startSsl = true;
log("Enabling SslSelectChannelConnector\n");
}
}
LineIterator.closeQuietly(it);
w.close();
FileUtils.copyFile(tempFile, jettyXml);
FileUtils.deleteQuietly(tempFile);
} else {
log("Neither SSL nor AJP are enabled.\n");
}
} catch (Exception e) {
throw new OnmsUpgradeException("Can't fix Jetty configuration because " + e.getMessage(), e);
}
if (jettyAJP != null && !ajpWasFixed) {
throw new OnmsUpgradeException("Can't enable APJ, please manually edit jetty.xml and uncomment the section where org.eclipse.jetty.ajp.Ajp13SocketConnector is defined.");
}
if (jettySSL != null && !sslWasFixed) {
throw new OnmsUpgradeException("Can't enable SSL, please manually edit jetty.xml and uncomment the section where org.eclipse.jetty.server.ssl.SslSelectChannelConnector is defined.");
}
}
Aggregations