Dabr里采用的靠谱的Autolink库让我在写新浪微博应用的时候很是心动, 于是拿来改了改…除了修改了@中文人名,#标签#等匹配方式, 和url匹配末尾的左方括号[问题(因为新浪微博上许多人发url后会按一个表情- -), 还增加了email匹配功能 🙂
请注意: 此代码继承原作者的 Apache License v2进行授权.
autoLinkUsernamesAndLists($this->autoLinkURLs($this->autoLinkHashtags($this->autoLinkEmail($tweet))));
}
public function autoLinkHashtags($tweet) {
// TODO Match latin chars with accents
/*return preg_replace('$(^|[^0-9A-Z&/]+)([##]+)([0-9A-Z_]*[A-Z_]+[a-z0-9_üÀ-ÖØ-öø-ÿ]*)$iu',
'${1}${2}${3}',
$tweet);*/
$t = preg_replace('$([##])([a-z0-9\-_\/\x{4e00}-\x{9fa5}]{1,20})([##])$iu',
'${1}${2}${3}',
$tweet);
return $t;
}
public function autoLinkEmail($tweet) {
$t = preg_replace('/([a-zA-Z0-9_\.]+)([@|@|#]|\[at\])([a-zA-Z0-9_]+([.][a-zA-Z0-9_]+)*[.][a-zA-Z]{2,4})/',
'${1}${2}${3}', $tweet);
return $t;
}
public function autoLinkURLs($tweet) {
$URL_VALID_PRECEEDING_CHARS = "(?:[^/\"':!=]|^|\\:)";
$URL_VALID_DOMAIN = "(?:[\\.-]|[^\\p{P}\\s])+\\.[a-z]{2,}(?::[0-9]+)?";
$URL_VALID_URL_PATH_CHARS = "[a-z0-9!\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~@]";
// Valid end-of-path chracters (so /foo. does not gobble the period).
// 1. Allow ) for Wikipedia URLs.
// 2. Allow =&# for empty URL parameters and other URL-join artifacts
$URL_VALID_URL_PATH_ENDING_CHARS = "[a-z0-9\\)=#/]";
$URL_VALID_URL_QUERY_CHARS = "[a-z0-9!\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~]";
$URL_VALID_URL_QUERY_ENDING_CHARS = "[a-z0-9_&=#]";
$VALID_URL_PATTERN_STRING = '$(' . // $1 total match
"(" . $URL_VALID_PRECEEDING_CHARS . ")" . // $2 Preceeding chracter
"(" . // $3 URL
"(https?://|www\\.)" . // $4 Protocol or beginning
"(" . $URL_VALID_DOMAIN . ")" . // $5 Domain(s) and optional port number
"(/" . $URL_VALID_URL_PATH_CHARS . "*" . // $6 URL Path
$URL_VALID_URL_PATH_ENDING_CHARS . "?)?" .
"(\\?" . $URL_VALID_URL_QUERY_CHARS . "*" . // $7 Query String
$URL_VALID_URL_QUERY_ENDING_CHARS . ")?" .
")" .
')$i';
return preg_replace_callback($VALID_URL_PATTERN_STRING,
array(get_class($this), 'replacementURLs'),
$tweet);
}
/**
* Callback used by autoLinkURLs
*/
private function replacementURLs($matches) {
$replacement = $matches[2];
//Workaround for [smile]
if (substr($matches[3], -1, 1) == "[")
{
$matches[3] = substr($matches[3], 0, -1);
$append = "[";
}
else
$append = "";
if (substr($matches[3], 0, 7) == 'http://' || substr($matches[3], 0, 8) == 'https://') {
$replacement .= '' . $matches[3] . '';
} else {
$replacement .= '' . $matches[3] . '';
}
$replacement .= $append;
return $replacement;
}
public function autoLinkUsernamesAndLists($tweet) {
return preg_replace_callback('$([^a-z0-9_]|^)([@|@])([a-z0-9\-_\x{4e00}-\x{9fa5}]{1,20})(/[a-z][a-z0-9\x80-\xFF-]{0,79})?$iu',
array($this, 'replacementUsernameAndLists'),
$tweet);
}
/**
* Callback used by autoLinkUsernamesAndLists
*/
private function replacementUsernameAndLists($matches) {
$replacement = $matches[1];
#$replacement .= $matches[2];
if (isset($matches[4])) {
/* Replace the list and username */
$replacement .= '' . $matches[2] . $matches[3] . $matches[4] . '';
} else {
/* Replace the username */
$replacement .= '' . $matches[2] . $matches[3] . '';
}
return $replacement;
}
private function get_base()
{
return BASE_URL;
}
}
?>
这个文件最近又更新过一回了、、貌似是重写了、、
唔..我的改动比较大,所以不怎么想动了= =||||||
沙发,马克~
惊现野生奶瓶, 围观+膜拜Orz…