Code
<?php
function parse_links($document) {
# Zero or more whitespace characters
$S0 = '\s*';
# One or more whitespace characters
$S1 = '\s+';
# Anchor tag start
$anch1 = '<a' . $S1;
# href= pattern
$href1 = 'href' . $S0 . '=' . $S0;
# quoted strings, with selection
$q1 = "'[^']'";
$q2 = '"[^"]*"';
$q = "($q1|$q2)";
# full link pattern
$link_RE = "$anch1$href1$q$S0>\s*(.*?)</a>";
//global $q, $href1, $link_RE;
preg_match_all("#$link_RE#i", $document, $matches);
return $matches; // returns an array
} // end function parse_links()
//
// DEMO OF HOW TO USE THE FUNCTION
// grab a webpage
$str = implode('',file('http://del.icio.us'));
// call the parse_links function
$linkarray=parse_links($str);
// loop through the link array, outputting the URL + Link Text
for ($i = 0; $i < sizeof($linkarray[0]); $i++)
echo ($linkarray[2][$i] .$linkarray[1][$i] . "<br>");
?>
