Lowden

now

看一看NodeVisitor的源码

// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v $
// $Author: derrickoswald $
// $Date: 2005/04/24 17:48:27 $
// $Revision: 1.39 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.visitors;

import org.htmlparser.Remark;
import org.htmlparser.Text;
import org.htmlparser.Tag;

/**
 * The base class for the 'Visitor' pattern.
 * Classes that wish to use <code>visitAllNodesWith()</code> will subclass
 * this class and provide implementations for methods they are interested in
 * processing.<p>
 * The operation of <code>visitAllNodesWith()</code> is to call
 * <code>beginParsing()</code>, then <code>visitXXX()</code> according to the
 * types of nodes encountered in depth-first order and finally
 * <code>finishedParsing()</code>.<p>
 * Typical code to print all the link tags:
 * <pre>
 * import org.htmlparser.Parser;
 * import org.htmlparser.Tag;
 * import org.htmlparser.Text;
 * import org.htmlparser.util.ParserException;
 * import org.htmlparser.visitors.NodeVisitor;
 *
 * public class MyVisitor extends NodeVisitor
 * {
 *     public MyVisitor ()
 *     {
 *     }
 *
 *     public void visitTag (Tag tag)
 *     {
 *         System.out.println ("\n" + tag.getTagName () + tag.getStartPosition ());
 *     }
 *
 *     public void visitStringNode (Text string)
 *     {
 *         System.out.println (string);
 *     }
 *
 *     public static void main (String[] args) throws ParserException
 *     {
 *         Parser parser = new Parser ("http://cbc.ca");
 *         Visitor visitor = new MyVisitor ();
 *         parser.visitAllNodesWith (visitor);
 *     }
 * }
 * </pre>
 * If you want to handle more than one tag type with the same visitor
 * you will need to check the tag type in the visitTag method. You can
 * do that by either checking the tag name:
 * <pre>
 *     public void visitTag (Tag tag)
 *     {
 *        if (tag.getName ().equals ("BODY"))
 *            ... do something with the BODY tag
 *        else if (tag.getName ().equals ("FRAME"))
 *            ... do something with the FRAME tag
 *    }
 * </pre>
 * or you can use <code>instanceof</code> if all the tags you want to handle
 * have a {@link org.htmlparser.PrototypicalNodeFactory#registerTag registered}
 * tag (i.e. they are generated by the NodeFactory):
 * <pre>
 *     public void visitTag (Tag tag)
 *     {
 *        if (tag instanceof BodyTag)
 *        {
 *            BodyTag body = (BodyTag)tag;
 *            ... do something with body
 *        }
 *        else if (tag instanceof FrameTag)
 *        {
 *            FrameTag frame = (FrameTag)tag;
 *            ... do something with frame
 *        }
 *        else // other specific tags and generic TagNode objects
 *        {
 *        }
 *    }
 */
public abstract class NodeVisitor
{
    private boolean mRecurseChildren;
    private boolean mRecurseSelf;

    /**
     * Creates a node visitor that recurses itself and it's children.
     */
    public NodeVisitor ()
    {
        this (true);
    }
   
    /**
     * Creates a node visitor that recurses itself and it's children
     * only if <code>recurseChildren</code> is <code>true</code>.
     * @param recurseChildren If <code>true</code>, the visitor will
     * visit children, otherwise only the top level nodes are recursed.
     */
    public NodeVisitor (boolean recurseChildren)
    {
        this (recurseChildren, true);
    }
   
    /**
     * Creates a node visitor that recurses itself only if
     * <code>recurseSelf</code> is <code>true</code> and it's children
     * only if <code>recurseChildren</code> is <code>true</code>.
     * @param recurseChildren If <code>true</code>, the visitor will
     * visit children, otherwise only the top level nodes are recursed.
     * @param recurseSelf If <code>true</code>, the visitor will
     * visit the top level node.
     */
    public NodeVisitor (boolean recurseChildren, boolean recurseSelf)
    {
        mRecurseChildren = recurseChildren;
        mRecurseSelf = recurseSelf;
    }

    /**
     * Override this method if you wish to do special
     * processing prior to the start of parsing.
     */
    public void beginParsing ()
    {
    }

    /**
     * Called for each <code>Tag</code> visited.
     * @param tag The tag being visited.
     */
    public void visitTag (Tag tag)
    {
    }
   
    /**
     * Called for each <code>Tag</code> visited that is an end tag.
     * @param tag The end tag being visited.
     */
    public void visitEndTag (Tag tag)
    {
    }
   
    /**
     * Called for each <code>StringNode</code> visited.
     * @param string The string node being visited.
     */
    public void visitStringNode (Text string)
    {
    }
   
    /**
     * Called for each <code>RemarkNode</code> visited.
     * @param remark The remark node being visited.
     */
    public void visitRemarkNode (Remark remark)
    {
    }

    /**
     * Override this method if you wish to do special
     * processing upon completion of parsing.
     */
    public void finishedParsing ()
    {
    }

    /**
     * Depth traversal predicate.
     * @return <code>true</code> if children are to be visited.
     */
    public boolean shouldRecurseChildren ()
    {
        return (mRecurseChildren);
    }
   
    /**
     * Self traversal predicate.
     * @return <code>true</code> if a node itself is to be visited.
     */
    public boolean shouldRecurseSelf ()
    {
        return (mRecurseSelf);
    }
}

posted on 2009-06-09 13:05 Lowden 阅读(280) 评论(1)  编辑  收藏 所属分类: Java - HtmlParser

Feedback

# re: 看一看NodeVisitor的源码 2009-06-20 01:59 .ju

今天发现,NodeVisitor的代码这么重要  回复  更多评论   


My Links

Blog Stats

常用链接

留言簿

随笔分类

随笔档案

文章分类

文章档案

好友链接

搜索

最新评论

阅读排行榜

评论排行榜