为新浪微博修改的 Autolink.php

Dabr里采用的靠谱的Autolink库让我在写新浪微博应用的时候很是心动, 于是拿来改了改…除了修改了@中文人名,#标签#等匹配方式, 和url匹配末尾的左方括号[问题(因为新浪微博上许多人发url后会按一个表情- -), 还增加了email匹配功能 🙂

请注意: 此代码继承原作者的 Apache License v2进行授权.

autoLinkUsernamesAndLists($this->autoLinkURLs($this->autoLinkHashtags($this->autoLinkEmail($tweet))));
    }

    public function autoLinkHashtags($tweet) {
        // TODO Match latin chars with accents
        /*return preg_replace('$(^|[^0-9A-Z&/]+)([##]+)([0-9A-Z_]*[A-Z_]+[a-z0-9_üÀ-ÖØ-öø-ÿ]*)$iu',
            '${1}${2}${3}',
                            $tweet);*/
        $t = preg_replace('$([##])([a-z0-9\-_\/\x{4e00}-\x{9fa5}]{1,20})([##])$iu',
            '${1}${2}${3}',
                            $tweet);
        return $t;
    }
    
    public function autoLinkEmail($tweet) {
        $t = preg_replace('/([a-zA-Z0-9_\.]+)([@|@|#]|\[at\])([a-zA-Z0-9_]+([.][a-zA-Z0-9_]+)*[.][a-zA-Z]{2,4})/',
                          '${1}${2}${3}', $tweet);
        return $t;
    }

    public function autoLinkURLs($tweet) {
          $URL_VALID_PRECEEDING_CHARS = "(?:[^/\"':!=]|^|\\:)";
          $URL_VALID_DOMAIN = "(?:[\\.-]|[^\\p{P}\\s])+\\.[a-z]{2,}(?::[0-9]+)?";
          $URL_VALID_URL_PATH_CHARS = "[a-z0-9!\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~@]";
          // Valid end-of-path chracters (so /foo. does not gobble the period).
          //    1. Allow ) for Wikipedia URLs.
          //    2. Allow =&# for empty URL parameters and other URL-join artifacts
          $URL_VALID_URL_PATH_ENDING_CHARS = "[a-z0-9\\)=#/]";
          $URL_VALID_URL_QUERY_CHARS = "[a-z0-9!\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~]";
          $URL_VALID_URL_QUERY_ENDING_CHARS = "[a-z0-9_&=#]";
          $VALID_URL_PATTERN_STRING = '$(' .                        //  $1 total match
            "(" . $URL_VALID_PRECEEDING_CHARS . ")" .                //  $2 Preceeding chracter
            "(" .                                                                //  $3 URL
              "(https?://|www\\.)" .                                    //  $4 Protocol or beginning
              "(" . $URL_VALID_DOMAIN . ")" .                        //  $5 Domain(s) and optional port number
              "(/" . $URL_VALID_URL_PATH_CHARS . "*" .            //  $6 URL Path
                     $URL_VALID_URL_PATH_ENDING_CHARS . "?)?" .
              "(\\?" . $URL_VALID_URL_QUERY_CHARS . "*" .        //  $7 Query String
                      $URL_VALID_URL_QUERY_ENDING_CHARS . ")?" .
            ")" .
          ')$i';

        return preg_replace_callback($VALID_URL_PATTERN_STRING,
                                     array(get_class($this), 'replacementURLs'),
                                     $tweet);
    }

    /**
     * Callback used by autoLinkURLs
     */
    private function replacementURLs($matches) {
        $replacement  = $matches[2];
        //Workaround for [smile]
        if (substr($matches[3], -1, 1) == "[")
        {
            $matches[3] = substr($matches[3], 0, -1);
            $append = "[";
        }
        else
            $append = "";
        if (substr($matches[3], 0, 7) == 'http://' || substr($matches[3], 0, 8) == 'https://') {
            $replacement .= '' . $matches[3] . '';
        } else {
            $replacement .= '' . $matches[3] . '';
        }
        $replacement .= $append;
        return $replacement;
    }

    public function autoLinkUsernamesAndLists($tweet) {
        return preg_replace_callback('$([^a-z0-9_]|^)([@|@])([a-z0-9\-_\x{4e00}-\x{9fa5}]{1,20})(/[a-z][a-z0-9\x80-\xFF-]{0,79})?$iu',
                                     array($this, 'replacementUsernameAndLists'),
                                     $tweet);
    }

    /**
     * Callback used by autoLinkUsernamesAndLists
     */
    private function replacementUsernameAndLists($matches) {
        $replacement  = $matches[1];
        #$replacement .= $matches[2];

        if (isset($matches[4])) {
            /* Replace the list and username */
            $replacement .= '' . $matches[2] . $matches[3] . $matches[4] . '';
        } else {
            /* Replace the username */
            $replacement .= '' . $matches[2] . $matches[3] . '';
        }

        return $replacement;
    }
    
    private function get_base()
    {
        return BASE_URL;
    }
}
?>

4 thoughts on “为新浪微博修改的 Autolink.php”

Leave a Reply

Your email address will not be published. Required fields are marked *

QR Code Business Card