当前页面: 开发资料首页 → Java 专题 → Java输入输出过程中的字节流到字符流的转换

Java输入输出过程中的字节流到字符流的转换

摘要: Java输入输出过程中的字节流到字符流的转换

</td> </tr> <tr> <td height="35" valign="top" class="ArticleTeitle"> /*
* Copyright (c) 2002-2003 Che, Dong Email: chedongATbigfoot.com/chedongATchedong.com
* $Id: HelloUnicode.java,v 1.3 2003/03/09 08:41:46 chedong Exp $
*/

<table width="671" border="0"> <tr> <td width="396">import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;

/**
* 目的：
* 测试不同字符编码解码方式对多字节编码（中文）处理的影响
* 输入：
* 可以从命令行输入测试字符串
* 输出：
* 测试1 按照不同解码方式处理字符串，并按不同编码方式写入文件
* 测试2 按照不同解码方式从文件中将字符串读出
* @author Che, Dong
*/ </td> <td width="265"> </td> </tr> </table>
class HelloUnicode {
/**
* main entrance
* @param args command line arguments
*/
public static void main(String[] args) {
String hello = "Hello world 世界你好";

//read from command line input
if (args.length > 0) {
hello = args[0];
}

try {
/*
* 试验1: 从测试字符串按系统缺省编码方式解码，并写入文件
*/
System.out.println(">>>>testing1: write hello world to files<<<<");
System.out.println("[test 1-1]: with system default encoding="
+ System.getProperty("file.encoding") + "\nstring=" + hello
+ "\tlength=" + hello.length());
printCharArray(hello);
writeFile("hello.orig.html", hello);

//把字符串按GB2312解码
hello = new String(hello.getBytes(), "GB2312");
System.out.println(
"[test 1-2]: getBytes with platform default encoding and decoding as gb2312:\nstring="
+ hello + "\tlength=" + hello.length());
writeFile("hello.gb2312.html", hello);
printCharArray(hello);

//把字符串按UTF8解码成字节流，并打印相应的字节
hello = new String(hello.getBytes("UTF8"));
System.out.println("[test 1-3]: convert string to UTF8\nstring="
+ hello + "\tlength=" + hello.length());
writeFile("hello.utf8.html", hello);
printCharArray(hello);

/*
* 试验2: 从试验1的输出文件中读取，并按照不同方式解码
*/
System.out.println(
">>>>testing2: reading and decoding from files<<<<");

//first file: encoding with system default
hello = readFile("hello.orig.html");
System.out.println(
"[test 2-1]: read hello.orig.html: decoding with system default encoding\nstring="
+ hello + "\tlength=" + hello.length());
printCharArray(hello);

//second file: decoding from GBK
hello = readFile("hello.gb2312.html");
// hello = new String(hello.getBytes(), "GB2312");
System.out.println(
"[test 2-2]: read hello.gb2312.html: decoding as GB2312\nstring="
+ hello + "\tlength=" + hello.length());
printCharArray(hello);

//third file: decoding from UTF8
hello = readFile("hello.utf8.html");
hello = new String(hello.getBytes(), "UTF8");
System.out.println(
"[test 2-3]: read hello.utf8.html: decoding as UTF8\nstring="
+ hello + "\tlength=" + hello.length());
printCharArray(hello);
} catch (Exception e) {
System.out.println(e.toString());
}
}

/**
* print char array
* @param inStr input string
*/
public static void printCharArray(String inStr) {
char[] myBuffer = inStr.toCharArray();

//list each Charactor in byte value, short value, and UnicodeBlock Mapping
for (int i = 0; i < inStr.length(); i++) {
byte b = (byte) myBuffer[i];
short s = (short) myBuffer[i];
String hexB = Integer.toHexString(b).toUpperCase();
String hexS = Integer.toHexString(s).toUpperCase();
StringBuffer sb = new StringBuffer();

//print char
sb.append("char[");
sb.append(i);
sb.append("]='");
sb.append(myBuffer[i]);
sb.append("'\t");

//byte value
sb.append("byte=");
sb.append(b);
sb.append(" \\u");
sb.append(hexB);
sb.append('\t');

//short value
sb.append("short=");
sb.append(s);
sb.append(" \\u");
sb.append(hexS);
sb.append('\t');

//Unicode Block
sb.append(Character.UnicodeBlock.of(myBuffer[i]));

System.out.println(sb.toString());
}

System.out.println();
}

/**
* write content to output file
* @param fileName output file name
* @param content file content to write
*/
private static void writeFile(String fileName, String content) {
try {
File tmpFile = new File(fileName);

if (tmpFile.exists()) {
tmpFile.delete();
}

FileWriter fw = new FileWriter(fileName, true);
fw.write(content);
fw.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}

/**
* read content from input file
* @param fileName input file name
* @return String file content
*/
private static String readFile(String fileName) {
try {
BufferedReader fr = new BufferedReader(new FileReader(fileName));
StringBuffer out = new StringBuffer();
String thisLine = new String();

while (thisLine != null) {
thisLine = fr.readLine();

if (thisLine != null) {
out.append(thisLine);
}
}

fr.close();

return out.toString();
} catch (Exception e) {
System.out.print(e.toString());
return null;
}
}
}

运行结果：

C:\ja>java HelloUnicode
>>>>testing1: write hello world to files<<<<
[test 1-1]: with system default encoding=GBK
string=Hello world 世界你好 length=16
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='世' byte=22 \u16 short=19990 \u4E16 CJK_UNIFIED_IDEOGRAPHS
char[13]='界' byte=76 \u4C short=30028 \u754C CJK_UNIFIED_IDEOGRAPHS
char[14]='你' byte=96 \u60 short=20320 \u4F60 CJK_UNIFIED_IDEOGRAPHS
char[15]='好' byte=125 \u7D short=22909 \u597D CJK_UNIFIED_IDEOGRAPHS

[test 1-2]: getBytes with platform default encoding and decoding as gb2312:
string=Hello world 世界你好 length=16
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='世' byte=22 \u16 short=19990 \u4E16 CJK_UNIFIED_IDEOGRAPHS
char[13]='界' byte=76 \u4C short=30028 \u754C CJK_UNIFIED_IDEOGRAPHS
char[14]='你' byte=96 \u60 short=20320 \u4F60 CJK_UNIFIED_IDEOGRAPHS
char[15]='好' byte=125 \u7D short=22909 \u597D CJK_UNIFIED_IDEOGRAPHS

[test 1-3]: convert string to UTF8
string=Hello world 涓栫晫浣犲ソ length=18
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='涓' byte=-109 \uFFFFFF93 short=28051 \u6D93 CJK_UNIFIED_IDEO
GRAPHS
char[13]='栫' byte=43 \u2B short=26667 \u682B CJK_UNIFIED_IDEOGRAPHS
char[14]='晫' byte=107 \u6B short=26219 \u666B CJK_UNIFIED_IDEOGRAPHS
char[15]='浣' byte=99 \u63 short=28003 \u6D63 CJK_UNIFIED_IDEOGRAPHS
char[16]='犲' byte=-78 \uFFFFFFB2 short=29362 \u72B2 CJK_UNIFIED_IDEO
GRAPHS
char[17]='ソ' byte=-67 \uFFFFFFBD short=12477 \u30BD KATAKANA

>>>>testing2: reading and decoding from files<<<<
[test 2-1]: read hello.orig.html: decoding with system default encoding
string=Hello world 世界你好 length=16
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='世' byte=22 \u16 short=19990 \u4E16 CJK_UNIFIED_IDEOGRAPHS
char[13]='界' byte=76 \u4C short=30028 \u754C CJK_UNIFIED_IDEOGRAPHS
char[14]='你' byte=96 \u60 short=20320 \u4F60 CJK_UNIFIED_IDEOGRAPHS
char[15]='好' byte=125 \u7D short=22909 \u597D CJK_UNIFIED_IDEOGRAPHS

[test 2-2]: read hello.gb2312.html: decoding as GB2312
string=Hello world 世界你好 length=16
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='世' byte=22 \u16 short=19990 \u4E16 CJK_UNIFIED_IDEOGRAPHS
char[13]='界' byte=76 \u4C short=30028 \u754C CJK_UNIFIED_IDEOGRAPHS
char[14]='你' byte=96 \u60 short=20320 \u4F60 CJK_UNIFIED_IDEOGRAPHS
char[15]='好' byte=125 \u7D short=22909 \u597D CJK_UNIFIED_IDEOGRAPHS

[test 2-3]: read hello.utf8.html: decoding as UTF8
string=Hello world 世界你好 length=16
char[0]='H' byte=72 \u48 short=72 \u48 BASIC_LATIN
char[1]='e' byte=101 \u65 short=101 \u65 BASIC_LATIN
char[2]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[3]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[4]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[5]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[6]='w' byte=119 \u77 short=119 \u77 BASIC_LATIN
char[7]='o' byte=111 \u6F short=111 \u6F BASIC_LATIN
char[8]='r' byte=114 \u72 short=114 \u72 BASIC_LATIN
char[9]='l' byte=108 \u6C short=108 \u6C BASIC_LATIN
char[10]='d' byte=100 \u64 short=100 \u64 BASIC_LATIN
char[11]=' ' byte=32 \u20 short=32 \u20 BASIC_LATIN
char[12]='世' byte=22 \u16 short=19990 \u4E16 CJK_UNIFIED_IDEOGRAPHS
char[13]='界' byte=76 \u4C short=30028 \u754C CJK_UNIFIED_IDEOGRAPHS
char[14]='你' byte=96 \u60 short=20320 \u4F60 CJK_UNIFIED_IDEOGRAPHS
char[15]='好' byte=125 \u7D short=22909 \u597D CJK_UNIFIED_IDEOGRAPHS

</td> </tr> <tr>

↑返回目录
前一篇: 创建多层目录
后一篇: 绘制立体饼图的程序源码