Learn how to process XML documents with Java in this sample chapter from Core Web Programming. Larry Brown and Marty Hall show you how to use Java to process XML documents by using the Document Object Model (DOM), the Simple API for XML (SAX), and the Extensible Style sheet Language for Transformations (XSLT).
SAX Example 1: Printing the Outline of an XML Document
Listing 23.7 shows a content handler that responds to three parts of an XML document: start tags, end tags, and tag bodies. It overrides the startElement, endElement, and characters methods to accomplish this. The handler simply prints out the start element, end element, and first word of tag body, with two spaces of indentation for each nesting level. To accomplish this task, the content handler overrides the following three methods:
*
startElement
This method prints a message indicating that it found the start tag for the element name. Any attributes associated with the element are listed in parentheses. The method also puts spaces in front of the printout, as specified by the indentation variable (initially 0). Finally, it adds 2 to this variable.
*
endElement
This method subtracts 2 from the indentation variable and then prints a message indicating that it found the end tag for the element.
*
characters
This method prints the first word of the tag body, leaving the indentation level unchanged.
Listing 23.8 shows a program that lets the user specify a SAX-compliant parser and an XML file, then invokes the parser with the outline-printing content handler just described (and shown in Listing 23.7). Figure 23–4 shows the initial result, and Listing 23.6 shows the top part of the output when orders.xml (Listing 23.9) is selected.
Figure 23–4 Interactively selecting the orders.xml file.
Listing 23.6 Partial output of SAXPrinter applied to orders.xml
Start tag: orders
Start tag: order
Start tag: count
37
End tag: count
Start tag: price
49.99
End tag: price
Start tag: book
Start tag: isbn
0130897930
End tag: isbn
Start tag: title
Core...
End tag: title
Start tag: authors
Start tag: author
Marty...
End tag: author
Start tag: author
Larry...
End tag: author
End tag: authors
End tag: book
End tag: order
Start tag: order
Start tag: count
1
End tag: count
Start tag: price
9.95
End tag: price
Start tag: yacht
Start tag: manufacturer
Luxury...
End tag: manufacturer
Start tag: model
M-1
End tag: model
Start tag: standardFeatures (oars=plastic, lifeVests=none)
false
End tag: standardFeatures
End tag: yacht
End tag: order
... (Rest of results omitted)
End tag: orders
Listing 23.7 PrintHandler.java
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.util.StringTokenizer;
/** A SAX handler that prints out the start tags, end tags,
* and first word of tag body. Indents two spaces
* for each nesting level.
*/
public class PrintHandler extends DefaultHandler {
private int indentation = 0;
/** When you see a start tag, print it out and then
* increase indentation by two spaces. If the
* element has attributes, place them in parens
* after the element name.
*/
public void startElement(String namespaceUri,
String localName,
String qualifiedName,
Attributes attributes)
throws SAXException {
indent(indentation);
System.out.print("Start tag: " + qualifiedName);
int numAttributes = attributes.getLength();
// For <someTag> just print out "someTag". But for
// <someTag att1="Val1" att2="Val2">, print out
// "someTag (att1=Val1, att2=Val2).
if (numAttributes > 0) {
System.out.print(" (");
for(int i=0; i<numAttributes; i++) {
if (i>0) {
System.out.print(", ");
}
System.out.print(attributes.getQName(i) + "=" +
attributes.getValue(i));
}
System.out.print(")");
}
System.out.println();
indentation = indentation + 2;
}
/** When you see the end tag, print it out and decrease
* indentation level by 2.
*/
public void endElement(String namespaceUri,
String localName,
String qualifiedName)
throws SAXException {
indentation = indentation - 2;
indent(indentation);
System.out.println("End tag: " + qualifiedName);
}
/** Print out the first word of each tag body. */
public void characters(char[] chars,
int startIndex,
int endIndex) {
String data = new String(chars, startIndex, endIndex);
// Whitespace makes up default StringTokenizer delimeters
StringTokenizer tok = new StringTokenizer(data);
if (tok.hasMoreTokens()) {
indent(indentation);
System.out.print(tok.nextToken());
if (tok.hasMoreTokens()) {
System.out.println("...");
} else {
System.out.println();
}
}
}
private void indent(int indentation) {
for(int i=0; i<indentation; i++) {
System.out.print(" ");
}
}
}
Listing 23.8 SAXPrinter.java
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
/** A program that uses SAX to print out the start tags,
* end tags, and first word of tag body of an XML file.
*/
public class SAXPrinter {
public static void main(String[] args) {
String jaxpPropertyName =
"javax.xml.parsers.SAXParserFactory";
// Pass the parser factory in on the command line with
// -D to override the use of the Apache parser.
if (System.getProperty(jaxpPropertyName) == null) {
String apacheXercesPropertyValue =
"org.apache.xerces.jaxp.SAXParserFactoryImpl";
System.setProperty(jaxpPropertyName,
apacheXercesPropertyValue);
}
String filename;
if (args.length > 0) {
filename = args[0];
} else {
String[] extensions = { "xml", "tld" };
WindowUtilities.setNativeLookAndFeel();
filename = ExtensionFileFilter.getFileName(".",
"XML Files",
extensions);
if (filename == null) {
filename = "test.xml";
}
}
printOutline(filename);
System.exit(0);
}
public static void printOutline(String filename) {
DefaultHandler handler = new PrintHandler();
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
parser.parse(filename, handler);
} catch(Exception e) {
String errorMessage =
"Error parsing " + filename + ": " + e;
System.err.println(errorMessage);
e.printStackTrace();
}
}
}
Listing 23.9 orders.xml
<?xml version="1.0" ?>
<orders>
<order>
<count>37</count>
<price>49.99</price>
<book>
<isbn>0130897930</isbn>
<title>Core Web Programming Second Edition</title>
<authors>
<author>Marty Hall</author>
<author>Larry Brown</author>
</authors>
</book>
</order>
<order>
<count>1</count>
<price>9.95</price>
<yacht>
<manufacturer>Luxury Yachts, Inc.</manufacturer>
<model>M-1</model>
<standardFeatures oars="plastic"
lifeVests="none">
false
</standardFeatures>
</yacht>
</order>
<order>
<count>3</count>
<price>22.22</price>
<book>
<isbn>B000059Z4H</isbn>
<title>Harry Potter and the Order of the Phoenix</title>
<authors>
<author>J.K. Rowling</author>
</authors>
</book>
</order>
<order>
<count>2</count>
<price>10452689.01</price>
<yacht>
<manufacturer>We B Boats, Inc.</manufacturer>
<model>236-A</model>
<standardFeatures bowlingAlley="double"
tennisCourt="grass">
true
</standardFeatures>
</yacht>
</order>
<order>
<count>13</count>
<price>49.99</price>
<book>
<isbn>0130897930</isbn>
<title>Core Web Programming Second Edition</title>
<authors>
<author>Marty Hall</author>
<author>Larry Brown</author>
</authors>
</book>
</order>
</orders>
SAX Example 1: Printing the Outline of an XML Document
Listing 23.7 shows a content handler that responds to three parts of an XML document: start tags, end tags, and tag bodies. It overrides the startElement, endElement, and characters methods to accomplish this. The handler simply prints out the start element, end element, and first word of tag body, with two spaces of indentation for each nesting level. To accomplish this task, the content handler overrides the following three methods:
*
startElement
This method prints a message indicating that it found the start tag for the element name. Any attributes associated with the element are listed in parentheses. The method also puts spaces in front of the printout, as specified by the indentation variable (initially 0). Finally, it adds 2 to this variable.
*
endElement
This method subtracts 2 from the indentation variable and then prints a message indicating that it found the end tag for the element.
*
characters
This method prints the first word of the tag body, leaving the indentation level unchanged.
Listing 23.8 shows a program that lets the user specify a SAX-compliant parser and an XML file, then invokes the parser with the outline-printing content handler just described (and shown in Listing 23.7). Figure 23–4 shows the initial result, and Listing 23.6 shows the top part of the output when orders.xml (Listing 23.9) is selected.
Figure 23–4 Interactively selecting the orders.xml file.
Listing 23.6 Partial output of SAXPrinter applied to orders.xml
Start tag: orders
Start tag: order
Start tag: count
37
End tag: count
Start tag: price
49.99
End tag: price
Start tag: book
Start tag: isbn
0130897930
End tag: isbn
Start tag: title
Core...
End tag: title
Start tag: authors
Start tag: author
Marty...
End tag: author
Start tag: author
Larry...
End tag: author
End tag: authors
End tag: book
End tag: order
Start tag: order
Start tag: count
1
End tag: count
Start tag: price
9.95
End tag: price
Start tag: yacht
Start tag: manufacturer
Luxury...
End tag: manufacturer
Start tag: model
M-1
End tag: model
Start tag: standardFeatures (oars=plastic, lifeVests=none)
false
End tag: standardFeatures
End tag: yacht
End tag: order
... (Rest of results omitted)
End tag: orders
Listing 23.7 PrintHandler.java
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.util.StringTokenizer;
/** A SAX handler that prints out the start tags, end tags,
* and first word of tag body. Indents two spaces
* for each nesting level.
*/
public class PrintHandler extends DefaultHandler {
private int indentation = 0;
/** When you see a start tag, print it out and then
* increase indentation by two spaces. If the
* element has attributes, place them in parens
* after the element name.
*/
public void startElement(String namespaceUri,
String localName,
String qualifiedName,
Attributes attributes)
throws SAXException {
indent(indentation);
System.out.print("Start tag: " + qualifiedName);
int numAttributes = attributes.getLength();
// For <someTag> just print out "someTag". But for
// <someTag att1="Val1" att2="Val2">, print out
// "someTag (att1=Val1, att2=Val2).
if (numAttributes > 0) {
System.out.print(" (");
for(int i=0; i<numAttributes; i++) {
if (i>0) {
System.out.print(", ");
}
System.out.print(attributes.getQName(i) + "=" +
attributes.getValue(i));
}
System.out.print(")");
}
System.out.println();
indentation = indentation + 2;
}
/** When you see the end tag, print it out and decrease
* indentation level by 2.
*/
public void endElement(String namespaceUri,
String localName,
String qualifiedName)
throws SAXException {
indentation = indentation - 2;
indent(indentation);
System.out.println("End tag: " + qualifiedName);
}
/** Print out the first word of each tag body. */
public void characters(char[] chars,
int startIndex,
int endIndex) {
String data = new String(chars, startIndex, endIndex);
// Whitespace makes up default StringTokenizer delimeters
StringTokenizer tok = new StringTokenizer(data);
if (tok.hasMoreTokens()) {
indent(indentation);
System.out.print(tok.nextToken());
if (tok.hasMoreTokens()) {
System.out.println("...");
} else {
System.out.println();
}
}
}
private void indent(int indentation) {
for(int i=0; i<indentation; i++) {
System.out.print(" ");
}
}
}
Listing 23.8 SAXPrinter.java
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
/** A program that uses SAX to print out the start tags,
* end tags, and first word of tag body of an XML file.
*/
public class SAXPrinter {
public static void main(String[] args) {
String jaxpPropertyName =
"javax.xml.parsers.SAXParserFactory";
// Pass the parser factory in on the command line with
// -D to override the use of the Apache parser.
if (System.getProperty(jaxpPropertyName) == null) {
String apacheXercesPropertyValue =
"org.apache.xerces.jaxp.SAXParserFactoryImpl";
System.setProperty(jaxpPropertyName,
apacheXercesPropertyValue);
}
String filename;
if (args.length > 0) {
filename = args[0];
} else {
String[] extensions = { "xml", "tld" };
WindowUtilities.setNativeLookAndFeel();
filename = ExtensionFileFilter.getFileName(".",
"XML Files",
extensions);
if (filename == null) {
filename = "test.xml";
}
}
printOutline(filename);
System.exit(0);
}
public static void printOutline(String filename) {
DefaultHandler handler = new PrintHandler();
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
parser.parse(filename, handler);
} catch(Exception e) {
String errorMessage =
"Error parsing " + filename + ": " + e;
System.err.println(errorMessage);
e.printStackTrace();
}
}
}
Listing 23.9 orders.xml
<?xml version="1.0" ?>
<orders>
<order>
<count>37</count>
<price>49.99</price>
<book>
<isbn>0130897930</isbn>
<title>Core Web Programming Second Edition</title>
<authors>
<author>Marty Hall</author>
<author>Larry Brown</author>
</authors>
</book>
</order>
<order>
<count>1</count>
<price>9.95</price>
<yacht>
<manufacturer>Luxury Yachts, Inc.</manufacturer>
<model>M-1</model>
<standardFeatures oars="plastic"
lifeVests="none">
false
</standardFeatures>
</yacht>
</order>
<order>
<count>3</count>
<price>22.22</price>
<book>
<isbn>B000059Z4H</isbn>
<title>Harry Potter and the Order of the Phoenix</title>
<authors>
<author>J.K. Rowling</author>
</authors>
</book>
</order>
<order>
<count>2</count>
<price>10452689.01</price>
<yacht>
<manufacturer>We B Boats, Inc.</manufacturer>
<model>236-A</model>
<standardFeatures bowlingAlley="double"
tennisCourt="grass">
true
</standardFeatures>
</yacht>
</order>
<order>
<count>13</count>
<price>49.99</price>
<book>
<isbn>0130897930</isbn>
<title>Core Web Programming Second Edition</title>
<authors>
<author>Marty Hall</author>
<author>Larry Brown</author>
</authors>
</book>
</order>
</orders>