use of java.nio.charset.CharacterCodingException in project elasticsearch by elastic.
the class Analysis method getReaderFromFile.
/**
* @return null If no settings set for "settingsPrefix" then return <code>null</code>.
* @throws IllegalArgumentException
* If the Reader can not be instantiated.
*/
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix) {
String filePath = settings.get(settingPrefix, null);
if (filePath == null) {
return null;
}
final Path path = env.configFile().resolve(filePath);
try {
return Files.newBufferedReader(path, StandardCharsets.UTF_8);
} catch (CharacterCodingException ex) {
String message = String.format(Locale.ROOT, "Unsupported character encoding detected while reading %s_path: %s files must be UTF-8 encoded", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
use of java.nio.charset.CharacterCodingException in project elasticsearch by elastic.
the class Analysis method getWordList.
/**
* Fetches a list of words from the specified settings file. The list should either be available at the key
* specified by settingsPrefix or in a file specified by settingsPrefix + _path.
*
* @throws IllegalArgumentException
* If the word list cannot be found at either key.
*/
public static List<String> getWordList(Environment env, Settings settings, String settingPrefix) {
String wordListPath = settings.get(settingPrefix + "_path", null);
if (wordListPath == null) {
String[] explicitWordList = settings.getAsArray(settingPrefix, null);
if (explicitWordList == null) {
return null;
} else {
return Arrays.asList(explicitWordList);
}
}
final Path path = env.configFile().resolve(wordListPath);
try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
return loadWordList(reader, "#");
} catch (CharacterCodingException ex) {
String message = String.format(Locale.ROOT, "Unsupported character encoding detected while reading %s_path: %s - files must be UTF-8 encoded", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ex);
} catch (IOException ioe) {
String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, path.toString());
throw new IllegalArgumentException(message, ioe);
}
}
use of java.nio.charset.CharacterCodingException in project elasticsearch by elastic.
the class AnalysisTests method testParseFalseEncodedFile.
public void testParseFalseEncodedFile() throws IOException {
Path tempDir = createTempDir();
Path dict = tempDir.resolve("foo.dict");
Settings nodeSettings = Settings.builder().put("foo.bar_path", dict).put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build();
try (OutputStream writer = Files.newOutputStream(dict)) {
// some invalid UTF-8
writer.write(new byte[] { (byte) 0xff, 0x00, 0x00 });
writer.write('\n');
}
Environment env = new Environment(nodeSettings);
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> Analysis.getWordList(env, nodeSettings, "foo.bar"));
assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString() + " - files must be UTF-8 encoded", ex.getMessage());
assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException || ex.getCause() instanceof CharacterCodingException);
}
use of java.nio.charset.CharacterCodingException in project hive by apache.
the class VectorUDFUnixTimeStampString method doGetField.
@Override
protected long doGetField(byte[] bytes, int start, int length) throws ParseException {
Date date = null;
try {
date = format.parse(Text.decode(bytes, start, length));
} catch (CharacterCodingException e) {
throw new ParseException(e.getMessage(), 0);
}
calendar.setTime(date);
return calendar.getTimeInMillis() / 1000;
}
use of java.nio.charset.CharacterCodingException in project hive by apache.
the class VectorUDFWeekOfYearString method doGetField.
@Override
protected long doGetField(byte[] bytes, int start, int length) throws ParseException {
Date date = null;
try {
String decoded = Text.decode(bytes, start, length);
date = format.parse(decoded);
} catch (CharacterCodingException e) {
throw new ParseException(e.getMessage(), 0);
}
calendar.setTime(date);
return calendar.get(Calendar.WEEK_OF_YEAR);
}
Aggregations