SAXParser
Read UTF-8 XML File in Java using SAX parser example
In the previous SAX parser tutorial we saw how to parse and read a simple XML File. If your file had UTF-8 encoding, there is a chance that the client produced a MalformedByteSequenceException
. In order to solve this you have to set the InputSource
encoding to UTF-8.
You can do this with the following code :
InputStream inputStream= new FileInputStream(xmlFile); InputStreamReader inputReader = new InputStreamReader(inputStream,"UTF-8"); InputSource inputSource = new InputSource(inputReader); InputSource.setEncoding("UTF-8");
Here is the XML File we are going to use for our demo. We have the special UTF-8 character ©.
testFile.xml:
<?xml version="1.0" encoding="UTF-8" standalone="no"?><company> <employee id="10"> <firstname>Jeremy</firstname> <lastname>Harley</lastname> <email>james@example.org</email> <department>Human Resources</department> <salary>2000000</salary> <address>34 Stanley St.©</address> </employee> <employee id="2"> <firstname>John</firstname> <lastname>May</lastname> <email>john@example.org</email> <department>Logistics</department> <salary>400</salary> <address>123 Stanley St.</address> </employee> </company>
MyHandler.java:
package com.javacodegeeks.java.core; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class MyHandler extends DefaultHandler { boolean tagFname = false; boolean tagLname = false; boolean tagEmail = false; boolean tagDep = false; boolean tagSalary = false; boolean tagAddress = false; public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (attributes.getLength() > 0) { String tag = "<" + qName; for (int i = 0; i < attributes.getLength(); i++) { tag += " " + attributes.getLocalName(i) + "=" + attributes.getValue(i); } tag += ">"; System.out.println(tag); } else { System.out.println("<" + qName + ">"); } if (qName.equalsIgnoreCase("firstname")) { tagFname = true; } if (qName.equalsIgnoreCase("lastname")) { tagLname = true; } if (qName.equalsIgnoreCase("email")) { tagEmail = true; } if (qName.equalsIgnoreCase("department")) { tagDep = true; } if (qName.equalsIgnoreCase("salary")) { tagSalary = true; } if (qName.equalsIgnoreCase("address")) { tagAddress = true; } } public void characters(char ch[], int start, int length) throws SAXException { if (tagFname) { System.out.println(new String(ch, start, length)); tagFname = false; } if (tagLname) { System.out.println(new String(ch, start, length)); tagLname = false; } if (tagEmail) { System.out.println(new String(ch, start, length)); tagEmail = false; } if (tagDep) { System.out.println(new String(ch, start, length)); tagDep = false; } if (tagSalary) { System.out.println(new String(ch, start, length)); tagSalary = false; } if (tagAddress) { System.out.println(new String(ch, start, length)); tagAddress = false; } } public void endElement(String uri, String localName, String qName) throws SAXException { System.out.println("</" + qName + ">"); } }
ParseUTF8XMLFileWithSAX.java:
package com.javacodegeeks.java.core; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.InputSource; public class ParseUTF8XMLFileWithSAX { private static final String xmlFilePath = "C:\\Users\\nikos7\\Desktop\\filesForExamples\\testFile.xml"; public static void main(String argv[]) { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); File xmlFile = new File(xmlFilePath); InputStream inputStream= new FileInputStream(xmlFile); InputStreamReader inputReader = new InputStreamReader(inputStream,"UTF-8"); InputSource inputSource = new InputSource(inputReader); inputSource.setEncoding("UTF-8"); saxParser.parse(inputSource, new MyHandler()); } catch (Exception e) { e.printStackTrace(); } } }
Output:
<company>
<employee id=10>
<firstname>
Jeremy
</firstname>
<lastname>
Harley
</lastname>
<email>
james@example.org
</email>
<department>
Human Resources
</department>
<salary>
2000000
</salary>
<address>
34 Stanley St.©
</address>
</employee>
<employee id=2>
<firstname>
John
</firstname>
<lastname>
May
</lastname>
<email>
john@example.org
</email>
<department>
Logistics
</department>
<salary>
400
</salary>
<address>
123 Stanley St.
</address>
</employee>
</company>
This was an example on how to read UTF-8 XML File in Java using SAX parser.