Index: weblib.php
===================================================================
RCS file: /cvsroot/moodle/moodle/lib/weblib.php,v
retrieving revision 1.970.2.155
diff -u -r1.970.2.155 weblib.php
--- weblib.php 3 Dec 2009 08:00:00 -0000 1.970.2.155
+++ weblib.php 4 Dec 2009 01:33:20 -0000
@@ -2303,13 +2303,29 @@
* @param string $text Passed in by reference. The string to be searched for urls.
*/
function convert_urls_into_links(&$text) {
-/// Make lone URLs into links. eg http://moodle.com/
- $text = eregi_replace("([[:space:]]|^|\(|\[)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])",
- "\\1\\2://\\3\\4", $text);
-
-/// eg www.moodle.com
- $text = eregi_replace("([[:space:]]|^|\(|\[)www\.([^[:space:]]*)([[:alnum:]#?/&=])",
- "\\1www.\\2\\3", $text);
+
+ // Check if we support unicode modifiers in regular expressions. Cache it.
+ // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
+ // chars are going to arrive to URLs officially really soon (2010?)
+ // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
+ // Various ideas from: http://alanstorm.com/url_regex_explained
+ // Unicode check, negative assertion and other bits from Moodle.
+ static $unicoderegexp;
+ if (!isset($unicoderegexp)) {
+ $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silenty, returning false,
+ }
+
+ if ($unicoderegexp) { //We can use unicode modifiers
+ $text = preg_replace('#(((http(s?))://)(((([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text);
+ $text = preg_replace('#((www\.([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl])(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text);
+ } else { //We cannot use unicode modifiers
+ $text = preg_replace('#(((http(s?))://)(((([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text);
+ $text = preg_replace('#((www\.([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z])(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text);
+ }
}
/**