Sunday, December 14, 2008

SWING(Html Parse)

import java.io.*;
import java.net.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;

public class HtmlParseDemo {
public static void main(String [] args) {
Reader r;
if (args.length == 0) {
System.err.println("Usage: java HTMLParseDemo [url | file]");
System.exit(0);
}
String spec = args[0];
try {
if (spec.indexOf("://") > 0) {
URL u = new URL(spec);
Object content = u.getContent();
if (content instanceof InputStream) {
r = new InputStreamReader((InputStream)content);
}
else if (content instanceof Reader) {
r = (Reader)content;
}
else {
throw new Exception("Bad URL content type.");
}
}
else {
r = new FileReader(spec);
}

HTMLEditorKit.Parser parser;
System.out.println("About to parse " + spec);
parser = new ParserDelegator();
parser.parse(r, new HTMLParseLister(), true);
r.close();
}
catch (Exception e) {
System.err.println("Error: " + e);
e.printStackTrace(System.err);
}
}
}

/**
* HTML parsing proceeds by calling a callback for
* each and every piece of the HTML document. This
* simple callback class simply prints an indented
* structural listing of the HTML data.
*/
class HTMLParseLister extends HTMLEditorKit.ParserCallback
{
int indentSize = 0;

protected void indent() {
indentSize += 3;
}
protected void unIndent() {
indentSize -= 3; if (indentSize < indentsize =" 0;" i =" 0;">, " +
a.getAttributeCount() + " attrs)");
indent();
}

public void handleEndTag(HTML.Tag t, int pos) {
unIndent();
pIndent();
System.out.println("Tag end()");
}

public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
pIndent();
System.out.println("Tag(<" + t.toString() + ">, " +
a.getAttributeCount() + " attrs)");
}

public void handleError(String errorMsg, int pos){
System.out.println("Parsing error: " + errorMsg + " at " + pos);
}
}


Run the pragram

java HtmlParseDemo http://www.google.com

No comments:

Post a Comment