// +---------------------------------------------------------------------- // | Ubb.php 2013-04-03 // +---------------------------------------------------------------------- namespace org\parser\driver; class Ubb { /** * UBB标签匹配规则 * @var array */ private $ubb = [ ['table', '\[table(?:=([\d%]*))?\]', '\[\/table\]', 'width'], ['tr', '\[tr\]', '\[\/tr\]', 'tag'], ['th', '\[th(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/th\]', 'widthAndHeight'], ['td', '\[td(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/td\]', 'widthAndHeight'], ['img', '\[img(?:=([\d%]*)(?:,([\d%]*))?)?\]', '\[\/img\]', 'imgWidthAndHeight'], ['img', '\[img=(.*?)(?:,([\d%]*)(?:,([\d%]*))?)?\/\]', 'img'], ['a', '\[url(?:=(.*?)(?:,([\w\-]*))?)?\]', '\[\/url\]', 'urlClass'], ['a', '\[a(?:=(.*?)(?:,([\w\-]*))?)?\]', '\[\/a\]', 'urlClass'], ['a', '\[url=(.*?)(?:,([\w\-]*))?\/\]', 'url'], ['a', '\[a=(.*?)(?:,([\w\-]*))?\/\]', 'url'], ['a', '\[email(?:=([\w\-]*))?\]', '\[\/email\]', 'emailClass'], ['ul', '\[ul(?:=([\w\-]*))?\]', '\[\/ul\]', 'class'], ['ol', '\[ol(?:=([\w\-]*))?\]', '\[\/ol\]', 'class'], ['li', '\[li(?:=([\w\-]*))?\]', '\[\/li\]', 'class'], ['span', '\[span(?:=([\w\-]*))?\]', '\[\/span\]', 'class'], ['div', '\[div(?:=([\w\-]*))?\]', '\[\/div\]', 'class'], ['p', '\[p(?:=([\w\-]*))?\]', '\[\/p\]', 'class'], ['strong', '\[b\]', '\[\/b\]', 'tag'], ['strong', '\[strong\]', '\[\/strong\]', 'tag'], ['i', '\[i\]', '\[\/i\]', 'tag'], ['em', '\[em\]', '\[\/em\]', 'tag'], ['sub', '\[sub\]', '\[\/sub\]', 'tag'], ['sup', '\[sup\]', '\[\/sup\]', 'tag'], ['pre', '\[code(?:=([a-z#\+\/]*))?\]', '\[\/code\]', 'code'], ['code', '\[line(?:=([a-z#\+\/]*))?\]', '\[\/line\]', 'code'], ]; /** * 解析UBB代码为HTML * @param string $content 要解析的UBB代码 * @return string 解析后的HTML代码 */ public function parse($content = '') { if (empty($content)) { return ''; } for ($i = 0, $count = count($this->ubb); $i < $count; $i++) { if (count($this->ubb[$i]) == 4) { //解析闭合标签 $content = $this->closeTag($content, $this->ubb[$i]); } else { $content = $this->onceTag($content, $this->ubb[$i]); } } return nl2br($content); } /** * 解析闭合标签,支持嵌套 * @param string $data 要解析的数据 * @param array $rule 解析规则 * @return string 解析后的内容 */ private function closeTag($data, $rule = '') { static $tag, $reg, $func, $count = 0; if (is_string($data)) { list($tag, $reg[0], $reg[1], $func) = $rule; do { $data = preg_replace_callback("/({$reg[0]})(.*?)({$reg[1]})/is", [$this, 'closeTag'], $data); } while ($count && $count--); //递归解析,直到嵌套解析完毕 return $data; } elseif (is_array($data)) { $num = count($data); if (preg_match("/{$reg[0]}/is", $data[$num - 2])) { //存在嵌套,进一步解析 $count = 1; $data[$num - 2] = preg_replace_callback("/({$reg[0]})(.*?)({$reg[1]})/is", [$this, 'closeTag'], $data[$num - 2] . $data[$num - 1]); return $data[1] . $data[$num - 2]; } else { //不存在嵌套,直接解析内容 $parse = '_' . $func; $data[$num - 2] = trim($data[$num - 2], "\r\n"); //去掉标签内容两端的换行符 return $this->$parse($tag, $data); } } } /** * 解析单标签 * @param string $data 要解析的数据 * @param array $rule 解析规则 * @return string 解析后的内容 */ private function onceTag($data, $rule = '') { list($tag, $reg, $func) = $rule; return preg_replace_callback("/{$reg}/is", [$this, '_' . $func], $data); } /** * 解析img单标签 * @param array $data 解析数据 * @return string 解析后的标签 */ private function _img($data) { $data[4] = $data[1]; return $this->_imgWidthAndHeight('', $data); } /** * 解析url单标签 * @param array $data 解析数据 * @return string 解析后的标签 */ private function _url($data) { $data[3] = $data[2]; $data[4] = $data[2] = $data[1]; return $this->_urlClass('', $data); } /** * 解析没有属性的标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - 标签内容 * @return string 解析后的标签 */ private function _tag($name, $data) { return "<{$name}>{$data[2]}"; } /** * 解析代码 * @param string $name 标签名 * @param array $data 解析数据 [2] - 语言类型,[3] - 代码内容 * @return string 解析后的标签 */ private function _code($name, $data) { $fix = ('pre' == $name) ? ['
', '
'] : ['', '']; if (empty($data[2])) { $data = "{$fix[0]}{$data[3]}{$fix[1]}"; } else { $data = "{$fix[0]}{$data[3]}{$fix[1]}"; } return $data; } /** * 解析含有width属性的标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - width, [3] - 标签内容 * @return string 解析后的标签 */ private function _width($name, $data) { if (empty($data[2])) { $data = "<{$name}>{$data[3]}"; } else { $data = "<{$name} width=\"{$data[2]}\">{$data[3]}"; } return $data; } /** * 解析含有width和height属性的标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - width, [3] - height, [4] - 标签内容 * @return string 解析后的标签 */ private function _widthAndHeight($name, $data) { if (empty($data[2]) && empty($data[3])) { $data = "<{$name}>{$data[4]}"; } elseif (!empty($data[2]) && empty($data[3])) { $data = "<{$name} width=\"{$data[2]}\">{$data[4]}"; } elseif (empty($data[2]) && !empty($data[3])) { $data = "<{$name} height=\"{$data[3]}\">{$data[4]}"; } else { $data = "<{$name} width=\"{$data[2]}\" height=\"{$data[3]}\">{$data[4]}"; } return $data; } /** * 解析含有width和height属性的图片标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - width, [3] - height, [4] - 图片URL * @return string 解析后的标签 */ private function _imgWidthAndHeight($name, $data) { if (empty($data[2]) && empty($data[3])) { $data = ""; } elseif (!empty($data[2]) && empty($data[3])) { $data = ""; } elseif (empty($data[2]) && !empty($data[3])) { $data = ""; } else { $data = ""; } return $data; } /** * 解析含有class属性的标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - class, [3] - 标签内容 * @return string 解析后的标签 */ private function _class($name, $data) { if (empty($data[2])) { $data = "<{$name}>{$data[3]}"; } else { $data = "<{$name} class=\"{$data[2]}\">{$data[3]}"; } return $data; } /** * 解析含有class属性的url标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - url, [3] - text * @return string 解析后的标签 */ private function _urlClass($name, $data) { empty($data[2]) && $data[2] = $data[4]; if (empty($data[3])) { $data = "{$data[4]}"; } else { $data = "{$data[4]}"; } return $data; } /** * 解析含有class属性的email标签 * @param string $name 标签名 * @param array $data 解析数据 [2] - class, [3] - email地址 * @return string 解析后的标签 */ private function _emailClass($name, $data) { //不是正确的EMAIL则不解析 if (preg_match('/^\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$/', $data[3])) { return $data[0]; } //编码email地址,防治被采集 $email = $this->encodeEmailAddress($data[3]); if (empty($data[2])) { $data = "{$email[1]}"; } else { $data = "{$email[1]}"; } return $data; } /** * 编码EMAIL地址,可以防治部分采集软件 * @param string $addr EMAIL地址 * @return array 编码后的EMAIL地址 [0] - 带mailto, [1] - 不带mailto */ private function encodeEmailAddress($addr) { $addr = "mailto:" . $addr; $chars = preg_split('/(? $char) { $ord = ord($char); # Ignore non-ascii chars. if ($ord < 128) { $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. # roughly 10% raw, 45% hex, 45% dec # '@' *must* be encoded. I insist. if ($r > 90 && '@' != $char) /* do nothing */; elseif ($r < 45) { $chars[$key] = '&#x' . dechex($ord) . ';'; } else { $chars[$key] = '&#' . $ord . ';'; } } } $addr = implode('', $chars); $text = implode('', array_slice($chars, 7)); # text without `mailto:` return [$addr, $text]; } }