Index: weblib.php =================================================================== RCS file: /cvsroot/moodle/moodle/lib/weblib.php,v retrieving revision 1.970.2.155 diff -u -r1.970.2.155 weblib.php --- weblib.php 3 Dec 2009 08:00:00 -0000 1.970.2.155 +++ weblib.php 4 Dec 2009 01:33:20 -0000 @@ -2303,13 +2303,29 @@ * @param string $text Passed in by reference. The string to be searched for urls. */ function convert_urls_into_links(&$text) { -/// Make lone URLs into links. eg http://moodle.com/ - $text = eregi_replace("([[:space:]]|^|\(|\[)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])", - "\\1\\2://\\3\\4", $text); - -/// eg www.moodle.com - $text = eregi_replace("([[:space:]]|^|\(|\[)www\.([^[:space:]]*)([[:alnum:]#?/&=])", - "\\1www.\\2\\3", $text); + + // Check if we support unicode modifiers in regular expressions. Cache it. + // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode + // chars are going to arrive to URLs officially really soon (2010?) + // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/ + // Various ideas from: http://alanstorm.com/url_regex_explained + // Unicode check, negative assertion and other bits from Moodle. + static $unicoderegexp; + if (!isset($unicoderegexp)) { + $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silenty, returning false, + } + + if ($unicoderegexp) { //We can use unicode modifiers + $text = preg_replace('#(((http(s?))://)(((([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + $text = preg_replace('#((www\.([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl])(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + } else { //We cannot use unicode modifiers + $text = preg_replace('#(((http(s?))://)(((([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + $text = preg_replace('#((www\.([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z])(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + } } /**