use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class TaggerExperiments method getFeaturesNew.
/**
* This method uses and deletes a file tempXXXXXX.x in the current directory!
*/
private void getFeaturesNew() {
// todo: can fnumArr overflow?
try {
log.info("TaggerExperiments.getFeaturesNew: initializing fnumArr.");
// what is the maximum number of active features
fnumArr = new byte[xSize][ySize];
File hFile = File.createTempFile("temp", ".x", new File("./"));
RandomAccessFile hF = new RandomAccessFile(hFile, "rw");
log.info(" length of sTemplates keys: " + sTemplates.size());
log.info("getFeaturesNew adding features ...");
int current = 0;
int numFeats = 0;
final boolean VERBOSE = false;
for (FeatureKey fK : sTemplates) {
int numF = fK.num;
int[] xValues;
Pair<Integer, String> wT = new Pair<>(numF, fK.val);
xValues = tFeature.getXValues(wT);
if (xValues == null) {
// + " " + i
log.info(" xValues is null: " + fK);
continue;
}
int numEvidence = 0;
int y = maxentTagger.tags.getIndex(fK.tag);
for (int xValue : xValues) {
if (maxentTagger.occurringTagsOnly) {
// check whether the current word in x has occurred with y
String word = ExtractorFrames.cWord.extract(tHistories.getHistory(xValue));
if (maxentTagger.dict.getCount(word, fK.tag) == 0) {
continue;
}
}
if (maxentTagger.possibleTagsOnly) {
String word = ExtractorFrames.cWord.extract(tHistories.getHistory(xValue));
String[] tags = maxentTagger.dict.getTags(word);
Set<String> s = Generics.newHashSet(Arrays.asList(maxentTagger.tags.deterministicallyExpandTags(tags)));
if (DEBUG)
System.err.printf("possible tags for %s: %s\n", word, Arrays.toString(s.toArray()));
if (!s.contains(fK.tag))
continue;
}
numEvidence += this.px[xValue];
}
if (populated(numF, numEvidence)) {
int[] positions = tFeature.getPositions(fK);
if (maxentTagger.occurringTagsOnly || maxentTagger.possibleTagsOnly) {
// TODO
positions = null;
}
if (positions == null) {
// write this in the file and create a TaggerFeature for it
// int numElem
int numElements = 0;
for (int x : xValues) {
if (maxentTagger.occurringTagsOnly) {
// check whether the current word in x has occurred with y
String word = ExtractorFrames.cWord.extract(tHistories.getHistory(x));
if (maxentTagger.dict.getCount(word, fK.tag) == 0) {
continue;
}
}
if (maxentTagger.possibleTagsOnly) {
String word = ExtractorFrames.cWord.extract(tHistories.getHistory(x));
String[] tags = maxentTagger.dict.getTags(word);
Set<String> s = Generics.newHashSet(Arrays.asList(maxentTagger.tags.deterministicallyExpandTags(tags)));
if (!s.contains(fK.tag))
continue;
}
numElements++;
hF.writeInt(x);
fnumArr[x][y]++;
}
TaggerFeature tF = new TaggerFeature(current, current + numElements - 1, fK, maxentTagger.getTagIndex(fK.tag), this);
tFeature.addPositions(current, current + numElements - 1, fK);
current = current + numElements;
feats.add(tF);
if (VERBOSE) {
log.info(" added feature with key " + fK + " has support " + numElements);
}
} else {
for (int x : xValues) {
fnumArr[x][y]++;
}
// this is the second time to write these values
TaggerFeature tF = new TaggerFeature(positions[0], positions[1], fK, maxentTagger.getTagIndex(fK.tag), this);
feats.add(tF);
if (VERBOSE) {
log.info(" added feature with key " + fK + " has support " + xValues.length);
}
}
// look up the tag # in the index
if (maxentTagger.fAssociations.size() <= fK.num) {
for (int i = maxentTagger.fAssociations.size(); i <= fK.num; ++i) {
maxentTagger.fAssociations.add(Generics.newHashMap());
}
}
Map<String, int[]> fValueAssociations = maxentTagger.fAssociations.get(fK.num);
int[] fTagAssociations = fValueAssociations.get(fK.val);
if (fTagAssociations == null) {
fTagAssociations = new int[ySize];
for (int i = 0; i < ySize; ++i) {
fTagAssociations[i] = -1;
}
fValueAssociations.put(fK.val, fTagAssociations);
}
fTagAssociations[maxentTagger.tags.getIndex(fK.tag)] = numFeats;
numFeats++;
}
}
// foreach FeatureKey fK
// read out the file and put everything in an array of ints stored in Feats
tFeature.release();
feats.xIndexed = new int[current];
hF.seek(0);
int current1 = 0;
while (current1 < current) {
feats.xIndexed[current1] = hF.readInt();
current1++;
}
log.info(" total feats: " + sTemplates.size() + ", populated: " + numFeats);
hF.close();
hFile.delete();
// what is the maximum number of active features per pair
int max = 0;
int maxGt = 0;
int numZeros = 0;
for (int x = 0; x < xSize; x++) {
int numGt = 0;
for (int y = 0; y < ySize; y++) {
if (fnumArr[x][y] > 0) {
numGt++;
if (max < fnumArr[x][y]) {
max = fnumArr[x][y];
}
} else {
// if 00
numZeros++;
}
}
if (maxGt < numGt) {
maxGt = numGt;
}
}
// for x
log.info(" Max features per x,y pair: " + max);
log.info(" Max non-zero y values for an x: " + maxGt);
log.info(" Number of non-zero feature x,y pairs: " + (xSize * ySize - numZeros));
log.info(" Number of zero feature x,y pairs: " + numZeros);
log.info("end getFeaturesNew.");
} catch (Exception e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class StringUtils method argsToProperties.
/**
* Analogous to {@link #argsToMap}. However, there are several key differences between this method and {@link #argsToMap}:
* <ul>
* <li> Hyphens are stripped from flag names </li>
* <li> Since Properties objects are String to String mappings, the default number of arguments to a flag is
* assumed to be 1 and not 0. </li>
* <li> Furthermore, the list of arguments not bound to a flag is mapped to the "" property, not null </li>
* <li> The special flags "-prop", "-props", "-properties", "-args", or "-arguments" will load the property file
* specified by its argument. </li>
* <li> The value for flags without arguments is set to "true" </li>
* <li> If a flag has multiple arguments, the value of the property is all
* of the arguments joined together with a space (" ") character between them.</li>
* <li> The value strings are trimmed so trailing spaces do not stop you from loading a file.</li>
* </ul>
* Properties are read from left to right, and later properties will override earlier ones with the same name.
* Properties loaded from a Properties file with the special args are defaults that can be overridden by command line
* flags (or earlier Properties files if there is nested usage of the special args.
*
* @param args Command line arguments
* @param flagsToNumArgs Map of how many arguments flags should have. The keys are without the minus signs.
* @return A Properties object representing the arguments.
*/
public static Properties argsToProperties(String[] args, Map<String, Integer> flagsToNumArgs) {
Properties result = new Properties();
List<String> remainingArgs = new ArrayList<>();
for (int i = 0; i < args.length; i++) {
String key = args[i];
if (!key.isEmpty() && key.charAt(0) == '-') {
// found a flag
if (key.length() > 1 && key.charAt(1) == '-') {
// strip off 2 hyphens
key = key.substring(2);
} else {
// strip off the hyphen
key = key.substring(1);
}
Integer maxFlagArgs = flagsToNumArgs.get(key);
int max = maxFlagArgs == null ? 1 : maxFlagArgs;
int min = maxFlagArgs == null ? 0 : maxFlagArgs;
if (maxFlagArgs != null && maxFlagArgs == 0 && i < args.length - 1 && ("true".equalsIgnoreCase(args[i + 1]) || "false".equalsIgnoreCase(args[i + 1]))) {
// case: we're reading a boolean flag. TODO(gabor) there's gotta be a better way...
max = 1;
}
List<String> flagArgs = new ArrayList<>();
// cdm oct 2007: add length check to allow for empty string argument!
for (int j = 0; j < max && i + 1 < args.length && (j < min || args[i + 1].isEmpty() || args[i + 1].charAt(0) != '-'); i++, j++) {
flagArgs.add(args[i + 1]);
}
String value;
if (flagArgs.isEmpty()) {
value = "true";
} else {
value = join(flagArgs, " ");
}
if (key.equalsIgnoreCase(PROP) || key.equalsIgnoreCase(PROPS) || key.equalsIgnoreCase(PROPERTIES) || key.equalsIgnoreCase(ARGUMENTS) || key.equalsIgnoreCase(ARGS) || key.equalsIgnoreCase(LANG)) {
result.setProperty(PROPERTIES, value);
} else {
result.setProperty(key, value);
}
} else {
remainingArgs.add(args[i]);
}
}
if (!remainingArgs.isEmpty()) {
result.setProperty("", join(remainingArgs, " "));
}
/* Processing in reverse order, add properties that aren't present only. Thus, later ones override earlier ones. */
while (result.containsKey(PROPERTIES)) {
String file = result.getProperty(PROPERTIES);
if (LanguageInfo.isStanfordCoreNLPSupportedLang(file))
file = LanguageInfo.getLanguagePropertiesFile(file);
result.remove(PROPERTIES);
Properties toAdd = new Properties();
BufferedReader reader = null;
try {
reader = IOUtils.readerFromString(file);
toAdd.load(reader);
// trim all values
for (String propKey : toAdd.stringPropertyNames()) {
String newVal = toAdd.getProperty(propKey);
toAdd.setProperty(propKey, newVal.trim());
}
} catch (IOException e) {
String msg = "argsToProperties could not read properties file: " + file;
throw new RuntimeIOException(msg, e);
} finally {
IOUtils.closeIgnoringExceptions(reader);
}
for (String key : toAdd.stringPropertyNames()) {
String val = toAdd.getProperty(key);
if (!result.containsKey(key)) {
result.setProperty(key, val);
}
}
}
return result;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class StringUtils method printToFile.
/**
* Prints to a file. If the file already exists, appends if
* {@code append=true}, and overwrites if {@code append=false}.
*/
public static void printToFile(File file, String message, boolean append) {
PrintWriter pw = null;
try {
Writer fw = new FileWriter(file, append);
pw = new PrintWriter(fw);
pw.print(message);
} catch (Exception e) {
throw new RuntimeIOException("Exception in printToFile " + file.getAbsolutePath(), e);
} finally {
IOUtils.closeIgnoringExceptions(pw);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class MetaClass method cast.
/**
* Cast a String representation of an object into that object.
* E.g. "5.4" will be cast to a Double; "[1,2,3]" will be cast
* to an Integer[].
*
* NOTE: Date parses from a Long
*
* @param <E> The type of the object returned (same as type)
* @param value The string representation of the object
* @param type The type (usually class) to be returned (same as E)
* @return An object corresponding to the String value passed
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public static <E> E cast(String value, Type type) {
// --Get Type
Class<?> clazz;
if (type instanceof Class) {
clazz = (Class<?>) type;
} else if (type instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) type;
clazz = (Class<?>) pt.getRawType();
} else {
throw new IllegalArgumentException("Cannot cast to type (unhandled type): " + type);
}
// --Cast
if (String.class.isAssignableFrom(clazz)) {
// (case: String)
return (E) value;
} else if (Boolean.class.isAssignableFrom(clazz) || boolean.class.isAssignableFrom(clazz)) {
// (case: boolean)
if ("1".equals(value)) {
return (E) Boolean.TRUE;
}
return (E) Boolean.valueOf(Boolean.parseBoolean(value));
} else if (Integer.class.isAssignableFrom(clazz) || int.class.isAssignableFrom(clazz)) {
// (case: integer)
try {
return (E) Integer.valueOf(Integer.parseInt(value));
} catch (NumberFormatException e) {
return (E) Integer.valueOf((int) Double.parseDouble(value));
}
} else if (BigInteger.class.isAssignableFrom(clazz)) {
// (case: biginteger)
if (value == null) {
return (E) BigInteger.ZERO;
}
return (E) new BigInteger(value);
} else if (Long.class.isAssignableFrom(clazz) || long.class.isAssignableFrom(clazz)) {
// (case: long)
try {
return (E) Long.valueOf(Long.parseLong(value));
} catch (NumberFormatException e) {
return (E) Long.valueOf((long) Double.parseDouble(value));
}
} else if (Float.class.isAssignableFrom(clazz) || float.class.isAssignableFrom(clazz)) {
// (case: float)
if (value == null) {
return (E) Float.valueOf(Float.NaN);
}
return (E) Float.valueOf(Float.parseFloat(value));
} else if (Double.class.isAssignableFrom(clazz) || double.class.isAssignableFrom(clazz)) {
// (case: double)
if (value == null) {
return (E) Double.valueOf(Double.NaN);
}
return (E) Double.valueOf(Double.parseDouble(value));
} else if (BigDecimal.class.isAssignableFrom(clazz)) {
// (case: bigdecimal)
if (value == null) {
return (E) BigDecimal.ZERO;
}
return (E) new BigDecimal(value);
} else if (Short.class.isAssignableFrom(clazz) || short.class.isAssignableFrom(clazz)) {
// (case: short)
try {
return (E) Short.valueOf(Short.parseShort(value));
} catch (NumberFormatException e) {
return (E) Short.valueOf((short) Double.parseDouble(value));
}
} else if (Byte.class.isAssignableFrom(clazz) || byte.class.isAssignableFrom(clazz)) {
// (case: byte)
try {
return (E) Byte.valueOf(Byte.parseByte(value));
} catch (NumberFormatException e) {
return (E) Byte.valueOf((byte) Double.parseDouble(value));
}
} else if (Character.class.isAssignableFrom(clazz) || char.class.isAssignableFrom(clazz)) {
// (case: char)
return (E) Character.valueOf((char) Integer.parseInt(value));
} else if (Lazy.class.isAssignableFrom(clazz)) {
// (case: Lazy)
final String v = value;
return (E) Lazy.of(() -> MetaClass.castWithoutKnowingType(v));
} else if (Optional.class.isAssignableFrom(clazz)) {
// (case: Optional)
return (E) ((value == null || "null".equals(value.toLowerCase()) || "empty".equals(value.toLowerCase()) || "none".equals(value.toLowerCase())) ? Optional.empty() : Optional.of(value));
} else if (java.util.Date.class.isAssignableFrom(clazz)) {
// (case: date)
try {
return (E) new Date(Long.parseLong(value));
} catch (NumberFormatException e) {
return null;
}
} else if (java.util.Calendar.class.isAssignableFrom(clazz)) {
// (case: date)
try {
Date d = new Date(Long.parseLong(value));
GregorianCalendar cal = new GregorianCalendar();
cal.setTime(d);
return (E) cal;
} catch (NumberFormatException e) {
return null;
}
} else if (FileWriter.class.isAssignableFrom(clazz)) {
try {
return (E) new FileWriter(new File(value));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (BufferedReader.class.isAssignableFrom(clazz)) {
try {
return (E) IOUtils.readerFromString(value);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (FileReader.class.isAssignableFrom(clazz)) {
try {
return (E) new FileReader(new File(value));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else if (File.class.isAssignableFrom(clazz)) {
return (E) new File(value);
} else if (Class.class.isAssignableFrom(clazz)) {
try {
return (E) Class.forName(value);
} catch (ClassNotFoundException e) {
return null;
}
} else if (clazz.isArray()) {
if (value == null) {
return null;
}
Class<?> subType = clazz.getComponentType();
// (case: array)
String[] strings = StringUtils.decodeArray(value);
Object[] array = (Object[]) Array.newInstance(clazz.getComponentType(), strings.length);
for (int i = 0; i < strings.length; i++) {
array[i] = cast(strings[i], subType);
}
return (E) array;
} else if (Map.class.isAssignableFrom(clazz)) {
return (E) StringUtils.decodeMap(value);
} else if (clazz.isEnum()) {
// (case: enumeration)
Class c = (Class) clazz;
if (value == null) {
return null;
}
if (value.charAt(0) == '"')
value = value.substring(1);
if (value.charAt(value.length() - 1) == '"')
value = value.substring(0, value.length() - 1);
try {
return (E) Enum.valueOf(c, value);
} catch (Exception e) {
try {
return (E) Enum.valueOf(c, value.toLowerCase(Locale.ROOT));
} catch (Exception e2) {
try {
return (E) Enum.valueOf(c, value.toUpperCase(Locale.ROOT));
} catch (Exception e3) {
return (E) Enum.valueOf(c, (Character.isUpperCase(value.charAt(0)) ? Character.toLowerCase(value.charAt(0)) : Character.toUpperCase(value.charAt(0))) + value.substring(1));
}
}
}
} else if (ObjectOutputStream.class.isAssignableFrom(clazz)) {
// (case: object output stream)
try {
return (E) new ObjectOutputStream((OutputStream) cast(value, OutputStream.class));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (ObjectInputStream.class.isAssignableFrom(clazz)) {
// (case: object input stream)
try {
return (E) new ObjectInputStream((InputStream) cast(value, InputStream.class));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (PrintStream.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) System.out;
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) System.err;
}
try {
return (E) new PrintStream(new FileOutputStream(value));
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (PrintWriter.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) new PrintWriter(System.out);
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) new PrintWriter(System.err);
}
try {
return (E) IOUtils.getPrintWriter(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (OutputStream.class.isAssignableFrom(clazz)) {
// (case: output stream)
if (value.equalsIgnoreCase("stdout") || value.equalsIgnoreCase("out")) {
return (E) System.out;
}
if (value.equalsIgnoreCase("stderr") || value.equalsIgnoreCase("err")) {
return (E) System.err;
}
File toWriteTo = cast(value, File.class);
try {
if (toWriteTo == null || (!toWriteTo.exists() && !toWriteTo.createNewFile())) {
throw new IllegalStateException("Could not create output stream (cannot write file): " + value);
}
return (E) IOUtils.getFileOutputStream(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (InputStream.class.isAssignableFrom(clazz)) {
// (case: input stream)
if (value.equalsIgnoreCase("stdin") || value.equalsIgnoreCase("in")) {
return (E) System.in;
}
try {
return (E) IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(value);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
try {
// (case: can parse from string)
Method decode = clazz.getMethod("fromString", String.class);
return (E) decode.invoke(MetaClass.create(clazz), value);
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | ClassCastException e) {
// Silent errors for misc failures
}
// Pass 2: Guess what the object could be
if (Tree.class.isAssignableFrom(clazz)) {
// (case: reading a tree)
try {
return (E) new PennTreeReader(new StringReader(value), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (Collection.class.isAssignableFrom(clazz)) {
// (case: reading a collection)
Collection rtn;
if (Modifier.isAbstract(clazz.getModifiers())) {
rtn = abstractToConcreteCollectionMap.get(clazz).createInstance();
} else {
rtn = MetaClass.create(clazz).createInstance();
}
Class<?> subType = clazz.getComponentType();
String[] strings = StringUtils.decodeArray(value);
for (String string : strings) {
if (subType == null) {
rtn.add(castWithoutKnowingType(string));
} else {
rtn.add(cast(string, subType));
}
}
return (E) rtn;
} else {
// We could not cast this object
return null;
}
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class HashIndex method loadFromFilename.
/**
* This assumes each line is of the form (number=value) and it adds each value in order of the lines in the file.
* Warning: This ignores the value of number, and just indexes each value it encounters in turn!
*
* @param file Which file to load
* @return An index built out of the lines in the file
*/
public static Index<String> loadFromFilename(String file) {
Index<String> index = new HashIndex<>();
try (BufferedReader br = IOUtils.readerFromString(file)) {
for (String line; (line = br.readLine()) != null; ) {
int start = line.indexOf('=');
if (start == -1 || start == line.length() - 1) {
continue;
}
index.add(line.substring(start + 1));
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
return index;
}
Aggregations