Languages :: PHP :: Knowledge Base : How to know if a set of URIs are bad or good links |
|||
| By: VGR |
Date: 21/03/2005 08:51:00 |
Points: 0 | Status: Answered Quality : Good |
|
Dated 18/03/2003 <?php // //VGR18032003 EE // $links=array(); $links[1]='http://www.netscape.com'; $links[2]='http://www.badlink.zob'; $links[3]='http://www.europeanexperts.org'; //test $DEBUGTEST=TRUE; if ($DEBUGTEST) echo count($links)." links in input "; // // inits $badlinks=0; $bad=array(); // loop through $links[] (beforehand filled in by you) for ($i=1;$i<=count($links);$i++) { // try to access that link $isgood=CheckURI($links[$i]); if ($DEBUGTEST) echo "link $i '".$links[$i]."' is ".(($isgood)?'OK':'KO')." "; // memorize result if (! $isgood) $bad[]=$i; } // display bad links $badlinks=count($bad); //test if ($DEBUGTEST) echo "$badlinks bad links found "; // for ($i=0;$i<$badlinks;$i++) echo "bad link '".$links[$bad[$i]]."' (index=$i) "; //VGR04022005 REM this can also be done more elegantly with foreach($bad as $i=>$badlink) echo "bad link '$badlink' (index=$i) "; // done function CheckURI($parurl) { // inits $result=TRUE; // try to get URI $filename = "$parurl"; $tobec=TRUE; $fd = @fopen ($filename,'r'); if ($fd) { // si page trouvée while ((!feof ($fd))and($tobec)) { $ligne= fgets($fd, 4096); if (!(strpos($ligne,'[404] Not Found')===false)) $tobec=FALSE; // stop as soon as this is encountered $contents []=$ligne; } // while lecture bloquante fclose ($fd); if ($tobec) { // file entirely read OK (note that we could stop after X first lines, the '404' message is not at the 345th line... // nothing, result is TRUE already // this block is in case you want to log anything like "last correct date where found the URI was OK" } else { // we stopped before the end : 404 found $result=FALSE; } } else { // page not found $result=FALSE; } // if page trouvée ou non return $result; } // CheckURI Boolean Function ?> |
|||
|
Do register to be able to answer |
|||
©2010 These pages are served without commercial sponsorship. (No popup ads, etc...). Bandwidth abuse increases hosting cost forcing sponsorship or shutdown. This server aggressively defends against automated copying for any reason including offline viewing, duplication, etc... Please respect this requirement and DO NOT RIP THIS SITE.
Please DO link to this page!








