xpath vs simpleTag vs 正規表現 ? 全部出来ればいいんじゃないのかな?

作成中
http://diggin.googlecode.com/svn/trunk/library/Diggin/Scraper/
http://code.google.com/p/diggin/source/browse/trunk/library/Diggin/Scraper/

<?php
require_once 'Diggin/Scraper/Client.php';
require_once 'Zend/Http/Client.php';
require_once 'Zend/Http/Client/Adapter/Test.php';

$adapter = new Zend_Http_Client_Adapter_Test();
$source = <<<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>titleTest</title>
 <link rel="alternate" type="application/rss+xml" title="hogehoge(RSS)" href="../rss" />
 <link rel="alternate" type="application/rss+xml" title="hogehoge(RSS)" href="rss.xml" />  
</head>/rss
<body>
<a href="./hogehoge">test</a>
<img id="id" src="/images/button.gif" alt="gazou" />tes</img>
<img id="id" src="../images/button.gif" alt="gazou" />
<img id="id" src="button.gif" alt="gazou" />
</body>
</html>
EOF;

$adapter->setResponse(
    "HTTP/1.1 200 OK"        . "\r\n" .
    "Content-type: text/xml" . "\r\n" .
                               "\r\n" .
    $source);
$url = "http://www.example.org/hoge/do.php/test?test#test";
$httpClient = new Zend_Http_Client($url, array('adapter' => $adapter));

$scraper = new Diggin_Scraper_Client();
$scraper->setHttpClient($httpClient);

require_once 'Diggin/Scraper/Strategy/Xpath/Adapter/Tidy.php';
require_once 'Diggin/Scraper/Strategy/Xpath/Adapter/Loadhtml.php';
require_once 'Diggin/Scraper/Strategy/Xpath/Adapter/Htmlparser.php';

$tidy = new Diggin_Scraper_Strategy_Xpath_Adapter_Tidy();
$loadhtml = new Diggin_Scraper_Strategy_Xpath_Adapter_Loadhtml();

$scraper->setStrategy("Diggin_Scraper_Strategy_Xpath", $tidy);
print_r($scraper->scrape("//a"));
$scraper->setStrategy("Diggin_Scraper_Strategy_Xpath", $loadhtml);
print_r($scraper->scrape("//a"));

//simpleTagで取得
$scraper->setStrategy("Diggin_Scraper_Strategy_SimpleTag");
print_r($scraper->scrape("a"));

//preg_match_all(今のところ)で取得
$scraper->setStrategy("Diggin_Scraper_Strategy_Regex");
print_r($scraper->scrape("/<\/\w+>/"));



結果

Array
(
    [0] => SimpleXMLElement Object
        (
            [@attributes] => Array
                (
                    [href] => ./hogehoge
                )

            [0] => test
        )

)
Array
(
    [0] => SimpleXMLElement Object
        (
            [@attributes] => Array
                (
                    [href] => ./hogehoge
                )

            [0] => test
        )

)
Array
(
    [0] => SimpleTag Object
        (
            [name] => a
            [value] => test
            [plain] => <a href="./hogehoge">test</a>
            [parameterList] => Array
                (
                    [href] => SimpleTagParameter Object
                        (
                            [id] => href
                            [value] => ./hogehoge
                            [name] => href
                        )

                )

            [attributeList] => Array
                (
                )

            [pos] => 240
            [start] => <a href="./hogehoge">
            [end] => </a>
            [normalization] => 
        )

)
Array
(
    [0] => Array
        (
            [0] => </title>
            [1] => </head>
            [2] => </a>
            [3] => </img>
            [4] => </body>
            [5] => </html>
        )

)