use of org.apache.jena.atlas.io.InStreamUTF8 in project jena by apache.
the class utf8 method main.
/** Simple program to help hunt down bad UTF-8 encoded characters */
public static void main(String[] args) {
long INIT_LINE = 1;
long INIT_COL = 1;
if (args.length == 0)
args = new String[] { "-" };
String label = "";
for (String fn : args) {
if (args.length > 1)
label = fn + ": ";
InputStream in = IO.openFile(fn);
in = new InputStreamBuffered(in);
long charCount = 0;
long lineNum = INIT_LINE;
long colNum = INIT_COL;
InStreamUTF8 utf8 = null;
try {
utf8 = new InStreamUTF8(in);
for (; ; ) {
int ch = utf8.read();
if (ch == -1)
break;
charCount++;
if (ch == '\n') {
lineNum++;
colNum = INIT_COL;
} else
colNum++;
if (!Character.isDefined(ch))
throw new AtlasException(String.format("No such codepoint: 0x%04X", ch));
}
System.out.printf("%s: chars = %d , lines = %d\n", fn, charCount, lineNum);
} catch (AtlasException ex) {
System.out.printf(label + "[line=%d, col=%d] %s\n", lineNum, colNum, ex.getMessage());
} finally {
IO.close(utf8);
}
}
}
use of org.apache.jena.atlas.io.InStreamUTF8 in project jena by apache.
the class TestStreamUTF8 method testIn.
static void testIn(String x) {
try {
byte[] bytes = stringAsBytes(x);
ByteArrayInputStream bin = new ByteArrayInputStream(bytes);
// Create string from bytes
try (InStreamUTF8 r = new InStreamUTF8(bin)) {
// Way too big
char[] cbuff = new char[x.length() * 10];
int len = r.read(cbuff);
String str = new String(cbuff, 0, len);
assertEquals(x, str);
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
Aggregations