package util;
import java.io.*;
import java.util.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
/**
* 解析HTML文件中特定元素
* LINK href
* A href
* IMG src
* SCRIPT src
*
*/
public class ParseHtml{
public static void main(String args[]){
ParseHtml ph = new ParseHtml();
try{
String filename = "d:\\y\\scounix.htm";
BufferedReader brd = new BufferedReader( new FileReader( filename ) );
char[] str = new char[50000];
brd.read(str);
String sHtml = new String( str );
startParse( sHtml );
}catch(Exception e){
e.printStackTrace();
}
}
private static void startParse(String sHtml){
try{
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new Callback();
parser.parse( new StringReader(sHtml), callback, true);
}catch(Exception e){
e.printStackTrace();
}
}
static class Callback extends HTMLEditorKit.ParserCallback{
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos){
if(t.equals(HTML.Tag.IMG)){
String src= (String)a.getAttribute(HTML.Attribute.SRC);
System.out.println("No." + count + " IMG src=" + src);
count++;
}
if(t.equals(HTML.Tag.LINK) ){
String href = (String)a.getAttribute(HTML.Attribute.HREF);
System.out.println("No." + count + " LINK src=" + href );
count ++;
}
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a,int pos){
if(t.equals(HTML.Tag.A) ){
String src= (String)a.getAttribute(HTML.Attribute.HREF);
System.out.println("No." + count + " A href=" + src );
count ++;
}
if(t.equals(HTML.Tag.SCRIPT) ){
String src = (String)a.getAttribute(HTML.Attribute.SRC);
System.out.println("No." + count + " SCRIPT src=" + src );
count ++;
}
}
private int count = 1;
}
}
說明:其中的staic class可以改為class
本站僅提供存儲(chǔ)服務(wù),所有內(nèi)容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請(qǐng)
點(diǎn)擊舉報(bào)。