+ Reply to Thread
Page 1 of 2 12 LastLast
Results 1 to 10 of 12

Thread: Ajax link checker

  1. #1
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Ajax link checker

    I found this cool jquery pluggin while searching the pluggins casually at the jquery site. It might be very usefull for web-admins.Here's the code:
    PHP Code:
    <?php
    /**
     * jQuery Link Checker 
     * 
     * http://troy.dyle.net
     * Created for: http://russianwebstudio.com
     * Copyright (c) 2007 Anton Sidashin
     * troy [at] simix.ru  
     *
     */


    error_reporting(E_ALL);

    // this function taken from Drupal ( drupal.org )
    function json($var) {
      switch (
    gettype($var)) {
        case 
    'boolean':
          return 
    $var 'true' 'false'// Lowercase necessary!
        
    case 'integer':
        case 
    'double':
          return 
    $var;
        case 
    'resource':
        case 
    'string':
          return 
    '"'str_replace(array("\r""\n""<"">""&"),
                                  array(
    '\r''\n''\x3c''\x3e''\x26'),
                                  
    addslashes($var)) .'"';
        case 
    'array':    
          if (empty (
    $var) || array_keys($var) === range(0sizeof($var) - 1)) {
            
    $output = array();
            foreach (
    $var as $v) {
              
    $output[] = json($v);
            }
            return 
    '[ 'implode(', '$output) .' ]';
          }
          
    // Otherwise, fall through to convert the array as an object.
        
    case 'object':
          
    $output = array();
          foreach (
    $var as $k => $v) {
            
    $output[] = json(strval($k)) .': 'json($v);
          }
          return 
    '{ 'implode(', '$output) .' }';
        default:
          return 
    'null';
      }
    }

    function 
    unique_urls($urls) {
      
    $uurls = array();
      for (
    $i=0; isset($urls[$i]); $i++) {   
        if (!
    in_array($urls[$i], $uurls)) {
          
    $uurls[] = $urls[$i];
        }
      }
      return 
    $uurls;
    }


    function 
    get_head($url$timeout 3) {

      
    $info = @parse_url($url);
      
    $fp = @fsockopen($info["host"], 80$errno$errstr$timeout);

      if (!
    $fp) {
         return 
    false;
      }
      
    // Checks the path is not empty
      
    if (empty($info["path"])) {
        
    // If it is empty it fills it
        
    $info["path"] = "/";
      }
      
    $query ""
        
      if (isset( 
    $info["query"] ) ) {
        
    $query "?".$info["query"]."";
      }
      
      
    $out  "HEAD ".$info["path"]."".$query." HTTP/1.0\r\n";
      
    $out .= "Host: ".$info["host"]."\r\n";
      
    $out .= "Connection: close \r\n" ;
      
    $out .= "User-Agent: jQuery_LinkChecker/1.1\r\n\r\n";
      
      
    // write the headers out
      
    fwrite($fp$out);
      
    $html '';
      

      while (!
    feof($fp) ) {
        
    $html .= fread($fp,8192);
      }
      
    //echo $html . '<br><br>';
      //flush();
      // Closes socket
      
    fclose$fp );
      
      return 
    $html;
    }

    // Get status code
    function get_status($header) {

      
    $headers explode"\r\n"$header );
      unset( 
    $header );
      
      if (
    preg_match("/HTTP\/[0-9A-Za-z +]/i" ,$headers[0])) {   
        
    $status preg_replace"/http\/[0-9]\.[0-9]/i"""$headers[0] );
        return 
    $status;
      } else {
        return 
    'Unknown status';
      }
      
    }

    function 
    url_exists($url$timeout 3) {
      
    $html get_head($url$timeout);
      if(empty(
    $html)) {
        return 
    false;
      }   

      
    $status get_status($html);
      
      if(
    strpos($status'200 OK') !== FALSE) {
        return 
    true;
      }
        
      return 
    false;
    }

     

    if(isset(
    $_GET['links'])) {  
      
    $links unique_urls($_GET['links']);
      
    $timeout = (int) $_GET['timeout'];
      
      
    $result = Array();
      foreach (
    $links as $l) {
        
    $result[] = array('href'=>rtrim($l'/\\') , 'status'=>url_exists($l$timeout) ? 'active' 'inactive');
      }

      echo 
    json($result);

    ?>
    JS:
    Code:
    /**
     * @author Anton Sidashin ( troy at simix dot ru )
     */
     
    
    jQuery.fn.linkChecker = function(settings) {  
        if(!this.length) return;
        settings = jQuery.extend({                                                                
                                                                linksAtOnce: 2,
                                                                checkScript: 'checklinks.php',
                                                                activeClass: 'active',
                                                                inactiveClass: 'inactive',
                                                                timeout: 3
                                                            }, settings);
        var urls = Array();
        this.each( function() { 
            urls.push(this.href);
        } );    
        
        while(urls.length) {
            linkSlice = Array();
            for(var i = 0; i<settings.linksAtOnce; i++) {
                if(urls.length) {
                 linkSlice.push(urls.shift());
                }
            }
            checkLinks(linkSlice, settings, this);
        }
        
        
        function checkLinks(urls, settings, jLinks) {     
            jQuery.getJSON(settings.checkScript, {'links[]':urls, 'timeout':settings.timeout}, function(links){
                for(var i = 0; i<links.length; i++) {             
                    jLinks.filter('[href^='+ links[i].href + ']').addClass(links[i].status == 'active' ? settings.activeClass : settings.inactiveClass);
                }
            });                
        }
    }
    Finally, html:
    HTML Code:
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
        <head>
            <title>RussianWebStudio.com</title>
            <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
            <link href="style.css" rel="stylesheet" type="text/css" />        
    <script Language="JavaScript"> function getXMLHTTPRequest() { try { req = new XMLHttpRequest(); } catch(err1) { try { req = new ActiveXObject("Msxml2.XMLHTTP"); } catch (err2) { try { req = new ActiveXObject("Microsoft.XMLHTTP"); } catch (err3) { req = false; } } } return req; } var http = getXMLHTTPRequest(); function checklinks() { var txt = ''; var j = document.getElementsByTagName('a').length; for(var i = 0; i < (j-1); i++) // iterate through the links { lnk = document.getElementsByTagName('a')[i]; txt = txt + lnk.href + "|"; // build the link list } lnk = document.getElementsByTagName('a')[j-1]; txt = txt + lnk.href; // no divider after last one var myurl = 'linkchecker.php?list='; myRand = parseInt(Math.random()*999999999999999); var modurl = myurl+txt+"&rand="+myRand; http.open("GET", modurl, true); http.onreadystatechange = useHttpResponse; http.send(null); } function useHttpResponse() { if (http.readyState == 4) { if(http.status != 200) { alert('Link Check Problem'); } } } </script> 
        
        <style>
            
            .inactive {
                color: #33ccff;
                background: #ccffff;
            }
            .active {
                color: #ffcc66;
                background: #ffffcc;
            }
            </style>
            
            </head>
        <body>
    <h1>jQuery Ajax Link Checker Demo</h1>
    <p>Just view source of that file to see example usage. Blue means active link, red means inactive</p>
    
    <li><a href="http://www.microsoft.com/en/us/default.aspx" class="goto">microsoft.com</a></li>
    <li><a href="http://37signaghls.com/" class="goto">37signals.com</a> (99.9% online, I think :)</li>
    <li><a href="http://unexistenthost.fm/" class="goto">unexistenthost.fm</a> (will be 100% offline)</li>
    <li><a href="http://russianwebstudio.com/" class="goto">russianwebstudio.com</a></li>
    <li><a href="http://sidashin.ru/" class="goto">sidashin.ru</a></li>
    
    
    </body>
    </html>
    But there's a slight problem, the link checker does not work correctly with 404 pages. Is there anyway to do that?

  2. #2
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Re: Ajax link checker

    Can someone atleast tell me if it's possible?

  3. #3
    misson is offline x10 Spammer misson is a jewel in the rough
    Join Date
    Mar 2008
    Location
    Libertatia
    Posts
    2,506

    Re: Ajax link checker

    Quote Originally Posted by Teensweb View Post
    But there's a slight problem, the link checker does not work correctly with 404 pages. Is there anyway to do that?
    What's correct behavior? It looks like the script reports a 404 as a failure, which seems correct to me. What doesn't seem correct is it treats redirects (3xx status codes) as failures. Should you wish to change the behavior, look to url_exists, which is the function that checks the status.

    The code could use some other alterations. Function json and unique_urls replicate PHP's own json_encode and array_unique, respectively. json_encode is not available in PHP4, true, but PHP4 is getting rarer and rarer. To handle PHP4, define a json_encode if and only if one doesn't already exist.

    Even better would be to drop the PHP script and have the JS script do all the work. There's no reason to have the server perform the extra processing & requests. The client is already making extra requests to the server, so you might as well have the client request the pages directly.

  4. #4
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Re: Ajax link checker

    Thanx for the help mission. But the actual problem is that I've a custom 404 page. So it doesn't detect that. Is there some work-around for that?. And also, this is just intended to be helpfull to me, as I am the admin of my site. And it doesn't affect the server load as I would do it only once or twice a week on all pages to ensure that no link's broken. Of course, if the javascript alone can do it, it is also fine. But i don't know how to do that. So is there a simple change available that can detect custom 404 pages also?

  5. #5
    misson is offline x10 Spammer misson is a jewel in the rough
    Join Date
    Mar 2008
    Location
    Libertatia
    Posts
    2,506

    Re: Ajax link checker

    Quote Originally Posted by Teensweb View Post
    Thanx for the help mission. But the actual problem is that I've a custom 404 page. So it doesn't detect that. Is there some work-around for that?.
    If properly configured, custom error pages shouldn't affect the script, for it never retrieves the page itself, only the HTTP headers. Given the limited information you provided, there's no way of knowing what's going wrong. How did you set up custom error pages? How are they breaking the link checker? Does the 404 page not return a 404 status? If not, you need to fix it so a 404 page results in a 404 status.

    Quote Originally Posted by Teensweb View Post
    And also, this is just intended to be helpfull to me, as I am the admin of my site. And it doesn't affect the server load as I would do it only once or twice a week on all pages to ensure that no link's broken. Of course, if the javascript alone can do it, it is also fine. But i don't know how to do that.
    To filter out duplicate URLs, the the URLs as properties of an object, thus treating the object as an associative array. You can specify the "HEAD" method when calling XMLHttpRequest::open(). XMLHttpRequest::status contains the HTTP response status. The rest of the PHP script is either unnecessary or just framework for those three tasks.

  6. #6
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Re: Ajax link checker

    "If properly configured, custom error pages shouldn't affect the script, for it never retrieves the page itself, only the HTTP headers. Given the limited information you provided, there's no way of knowing what's going wrong. How did you set up custom error pages? How are they breaking the link checker? Does the 404 page not return a 404 status? If not, you need to fix it so a 404 page results in a 404 status"
    I did it with the .htaccess file. I don't know what you mean by "Does the 404 page not return a 404 status"(m quite new to php) But one thing's for sure, if you goto any non exisiting pages like http://www.teensweb.co.cc/awdasda , it shows the 404 page but that link checker says that url is valid. Can you help me with that?
    Last edited by Teensweb; 04-28-2009 at 01:09 AM.

  7. #7
    misson is offline x10 Spammer misson is a jewel in the rough
    Join Date
    Mar 2008
    Location
    Libertatia
    Posts
    2,506

    Re: Ajax link checker

    Quote Originally Posted by Teensweb View Post
    I don't know what you mean by "Does the 404 page not return a 404 status"(m quite new to php)
    I mean does the HTTP response status line have a 404 status code or not (I checked; it does). In that case, the a custom error page is not causing problems for script, which doesn't even fetch the page itself. When describing a problem, stick to the symptoms and don't guess at what you think is the cause.

    Take a closer look at your HTML page. It doesn't use the jQuery based script, nor does it call the PHP script properly, nor does it use the output of the PHP script properly. It uses a different name for the PHP script. All together, it doesn't look like it's supposed to be used by the other two. However, useHttpResponse() hints at how you can check links using only JS.

    What is the benefit of these scripts? What do you want to use them for?

  8. #8
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Re: Ajax link checker

    Thank you mission, but what should I edit to fix the problem with that script?
    Edit:
    I have uploaded the script here
    you see the link www.notexisting.com is marked red, which means it detects server not found errors. But the link www.teensweb.co.cc/notexisting returns a 404 error but it's marked in blue. That's the prob.
    Last edited by Teensweb; 04-28-2009 at 11:54 PM. Reason: Automerged Doublepost

  9. #9
    misson is offline x10 Spammer misson is a jewel in the rough
    Join Date
    Mar 2008
    Location
    Libertatia
    Posts
    2,506

    Re: Ajax link checker

    checklinks.php returns "inactive" for the nonexistant page, so the problem's not there. (It actually returns "inactive" for all but the first page, but we'll get back to that later.) Examine the links in a DOM inspector and you'll discover that all but the first teensweb links have both an active and an inactive class; evidentally, the problem is that somewhere the wrong <a> elements are getting marked as active. The only place this happens is in jquery.linkchecker.js, in the line:
    Code:
    jLinks.filter('[href^='+ links[i].href + ']').addClass(links[i].status == 'active' ? settings.activeClass : settings.inactiveClass);
    Combine that with the fact that checklinks.php returns "inactive" for all but http://www.teensweb.co.cc/ and we can see the problem. The filter when processing http://www.teensweb.co.cc/ is '[href^=http://www.teensweb.co.cc/]', which looks for all href attributes that begin with a given url ("^="). Change "href^=" to "href=" to look for exact matches only.

    Once you make that change, you'll notice that all but the first link (the one to http://www.teensweb.co.cc/) will be marked as inactive. This goes back to my earlier remark about checklinks.php only looking for a 200 status when it should return "active" for any 2XX or 3XX status. You can fix the original script or use this altered version:
    PHP Code:
    <?php
    header
    ("text/plain");

    /**
     * jQuery Link Checker 
     * 
     * http://troy.dyle.net
     * Created for: http://russianwebstudio.com
     * Copyright (c) 2007 Anton Sidashin
     * troy [at] simix.ru  
     *
     */


    error_reporting(E_ALL);

    function 
    get_head($url$timeout 3) {

      
    $info = @parse_url($url);
      
    $fp = @fsockopen($info["host"], 80$errno$errstr$timeout);

      if (!
    $fp) {
         return 
    false;
      }
      
    // Checks the path is not empty
      
    if (empty($info["path"])) {
        
    // If it is empty it fills it
        
    $info["path"] = "/";
      }
      
    $query ""
        
      if (isset( 
    $info["query"] ) ) {
        
    $query "?".$info["query"]."";
      }
      
      
    $out  "HEAD ".$info["path"]."".$query." HTTP/1.0\r\n";
      
    $out .= "Host: ".$info["host"]."\r\n";
      
    $out .= "Connection: close \r\n" ;
      
    $out .= "User-Agent: jQuery_LinkChecker/1.1\r\n\r\n";
      
      
    // write the headers out
      
    fwrite($fp$out);

      
    $html '';
      while (!
    feof($fp) ) {
        
    $html .= fread($fp,8192);
      }
      
    fclose$fp );
      
      return 
    $html;
    }

    function 
    parse_status($statusLine) {
        
    $status explode(' '$statusLine3);
        if (
    count($status) == 3) {
            return 
    array_combine(array('version''statusCode''statusPhrase'), $status);
        } else {
            return array(
    'statusCode' => '999''statusPhrase' => 'unknown error requesting URL');
        }
    }

    // Get status code
    function get_status($url$timeout=3) {
        
    $head explode("\r\n"get_head($url$timeout), 2);
        return 
    parse_status($head[0]);
    }

    function 
    page_exists($status) {
      return isset(
    $status['statusCode']) && $status['statusCode'] < 400;
    }

    if(isset(
    $_GET['links'])) {
        
    $links array_unique($_GET['links']);
        if (isset(
    $_GET['timeout'])) {
            
    $timeout = (int) $_GET['timeout'];
        } else {
            
    $timeout 3;
        }
        
    $result = Array();
        foreach (
    $links as $l) {
            
    $status get_status($l$timeout);
            
    $result[] = array(
                
    'href'=>rtrim($l'/\\'),
                
    'statusCode' => $status['statusCode'],
                
    'status'=>page_exists($status) ? 'active' 'inactive'
            
    );
        }

        echo 
    json_encode($result);
    }
    ?>
    Edit: I keep forgetting to mention, it's "Misson", not "Mission" (only one "i").
    Last edited by misson; 04-29-2009 at 07:35 PM.

  10. #10
    Teensweb is offline x10 Lieutenant Teensweb is an unknown quantity at this point
    Join Date
    May 2008
    Posts
    352

    Re: Ajax link checker

    Sorry misson, i'm very careless (esp abt spellings). But when i edited everything as you said nothing gets highlighted at all! Weird! I'm damn new to php so i can't figure out the problem......
    Did you try it out yourself?

    Edit:
    I just messed up and using my logic got this code
    PHP Code:
    <?php
    /**
     * jQuery Link Checker 
     * 
     * http://troy.dyle.net
     * Created for: http://russianwebstudio.com
     * Copyright (c) 2007 Anton Sidashin
     * troy [at] simix.ru  
     *
     */


    error_reporting(E_ALL);

    // this function taken from Drupal ( drupal.org )
    function json($var) {
      switch (
    gettype($var)) {
        case 
    'boolean':
          return 
    $var 'true' 'false'// Lowercase necessary!
        
    case 'integer':
        case 
    'double':
          return 
    $var;
        case 
    'resource':
        case 
    'string':
          return 
    '"'str_replace(array("\r""\n""<"">""&"),
                                  array(
    '\r''\n''\x3c''\x3e''\x26'),
                                  
    addslashes($var)) .'"';
        case 
    'array':    
          if (empty (
    $var) || array_keys($var) === range(0sizeof($var) - 1)) {
            
    $output = array();
            foreach (
    $var as $v) {
              
    $output[] = json($v);
            }
            return 
    '[ 'implode(', '$output) .' ]';
          }
          
    // Otherwise, fall through to convert the array as an object.
        
    case 'object':
          
    $output = array();
          foreach (
    $var as $k => $v) {
            
    $output[] = json(strval($k)) .': 'json($v);
          }
          return 
    '{ 'implode(', '$output) .' }';
        default:
          return 
    'null';
      }
    }

    function 
    unique_urls($urls) {
      
    $uurls = array();
      for (
    $i=0; isset($urls[$i]); $i++) {   
        if (!
    in_array($urls[$i], $uurls)) {
          
    $uurls[] = $urls[$i];
        }
      }
      return 
    $uurls;
    }


    function 
    get_head($url$timeout 3) {

      
    $info = @parse_url($url);
      
    $fp = @fsockopen($info["host"], 80$errno$errstr$timeout);

      if (!
    $fp) {
         return 
    false;
      }
      
    // Checks the path is not empty
      
    if (empty($info["path"])) {
        
    // If it is empty it fills it
        
    $info["path"] = "/";
      }
      
    $query ""
        
      if (isset( 
    $info["query"] ) ) {
        
    $query "?".$info["query"]."";
      }
      
      
    $out  "HEAD ".$info["path"]."".$query." HTTP/1.0\r\n";
      
    $out .= "Host: ".$info["host"]."\r\n";
      
    $out .= "Connection: close \r\n" ;
      
    $out .= "User-Agent: jQuery_LinkChecker/1.1\r\n\r\n";
      
      
    // write the headers out
      
    fwrite($fp$out);
      
    $html '';
      

      while (!
    feof($fp) ) {
        
    $html .= fread($fp,8192);
      }
      
    //echo $html . '<br><br>';
      //flush();
      // Closes socket
      
    fclose$fp );
      
      return 
    $html;
    }

    // Get status code
    function get_status($header) {

      
    $headers explode"\r\n"$header );
      unset( 
    $header );
      
      if (
    preg_match("/HTTP\/[0-9A-Za-z +]/i" ,$headers[0])) {   
        
    $status preg_replace"/http\/[0-9]\.[0-9]/i"""$headers[0] );
        return 
    $status;
      } else {
        return 
    'Unknown status';
      }
      
    }

    function 
    url_exists($url$timeout 3) {
      
    $html get_head($url$timeout);
      if(empty(
    $html)) {
        return 
    false;
      }   

      
    $status get_status($html);
      
      if(
    $status 400) {
        return 
    true;
      }
        
      return 
    false;
    }

     

    if(isset(
    $_GET['links'])) {  
      
    $links unique_urls($_GET['links']);
      
    $timeout = (int) $_GET['timeout'];
      
      
    $result = Array();
      foreach (
    $links as $l) {
        
    $result[] = array('href'=>rtrim($l'/\\') , 'status'=>url_exists($l$timeout) ? 'active' 'inactive');
      }

      echo 
    json($result);

    ?>
    I also edited the js as you told and ended up with detecting 404 pages. but still the first link is neither active nor inactive see http://www.teensweb.co.cc/linkchecker-0.2-dev/
    Edit:
    One more doubt, if notexisting returns status 404, cant the problem just be solved with
    " if($status = "200") {
    return true;
    }
    "
    ?
    I see that it can't but just i curiosity you know..
    Last edited by Teensweb; 04-30-2009 at 12:01 PM. Reason: Automerged Doublepost

+ Reply to Thread
Page 1 of 2 12 LastLast

Similar Threads

  1. Link exchange with skepo.info
    By dbojan in forum Link Exchange
    Replies: 15
    Last Post: 11-27-2009, 06:21 PM
  2. Link Game
    By Mitch in forum Earning Money
    Replies: 0
    Last Post: 08-16-2008, 04:07 AM
  3. AJAX response xml not working
    By jspcodes in forum Programming Help
    Replies: 3
    Last Post: 06-05-2008, 08:12 AM
  4. AJAX Gaming Server?
    By Sup3rkirby in forum Programming Help
    Replies: 14
    Last Post: 12-26-2007, 04:14 PM
  5. Link Exchange
    By Conor in forum Scripts & 3rd Party Apps
    Replies: 117
    Last Post: 12-09-2007, 12:20 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
x10hosting free hosting for the masses
dedicated servers