From 07b3989052660c9770f2718bfdcde168f39e8030 Mon Sep 17 00:00:00 2001
From: Martin Langhoff <martin@catalyst.net.nz>
Date: Thu, 7 Feb 2008 18:00:47 +1300
Subject: [PATCH] staslib: stats_daily_cron() - small SQL improvements in activity & enrol SQL

This commit touches 3 elements

 - enrolments query

We need a separate query to deal fairly with the sitecourse.

 - activity query -

This SQL query is rather expensive mainly because of how it reads
role assignments to grab the "primary role". The "primary role"
SQL could be improved further to be faster and more correct.

 -  stats_get_action_names() replaces  stats_get_action_sql_in()

which simplifies things. It's only called once, so there's no point in
holding a cache, and it doesn't make any assumptions about what the
caller will do with it, so callers can use it with more freedom.
---
 lib/statslib.php |  179 ++++++++++++++++++++++++++++++++----------------------
 1 files changed, 107 insertions(+), 72 deletions(-)

diff --git a/lib/statslib.php b/lib/statslib.php
index 94e3f48..a06d173 100644
--- a/lib/statslib.php
+++ b/lib/statslib.php
@@ -80,8 +80,16 @@ function stats_cron_daily($maxdays=1) {
     delete_records_select('stats_daily',      "timeend > $timestart");
     delete_records_select('stats_user_daily', "timeend > $timestart");
 
-    $viewactions1 = stats_get_action_sql_in('view', 'l1');
-    $postactions2 = stats_get_action_sql_in('post', 'l2');
+    // Read in a few things we'll use later
+    $primary_roles = sql_primary_role_subselect();  // In dmllib.php
+    $viewactions = implode(',', stats_get_action_names('view'));
+    $postactions = implode(',', stats_get_action_names('post'));
+
+    // ... including default front page role id...
+    $defaultfprole = $CFG->defaultcourseroleid;
+    if (!empty($CFG->defaultfrontpageroleid)) { // 1.9 only, so far
+        $defaultfprole = $CFG->defaultfrontpageroleid;
+    }
 
     mtrace("Running daily statistics gathering, starting at $timestart...");
 
@@ -136,29 +144,55 @@ function stats_cron_daily($maxdays=1) {
         $timers[] = time() - $start;
         $start = time();
 
-    /// current enrolments - we do not know how many users were registered at given times in history :-(
+        // Enrolments vs active users
+        //
+        // Unfortunately, we do not know how many users were registered
+        // at given times in history :-(
+        // - stat1: enrolled users
+        // - stat2: active users
+        // - This is not "exact" because it follows a pre-roles metaphor.
+        //   When we look at the activity here, we only look at activity
+        //   from formally enrolled users (because of the LEFT OUTER JOIN).
+        //   This means that default-enrolment access isn't counted.
+        //   We could do a purely outer join here, I suspect, but it's not
+        //   easily portable (IIRC) and the participation numbers will look funny.
+        // - SITEID is specialcased here, because it's all about default enrolment
+        //   in that case, we'll count non-deleted users.
+        //
         if ($timestart > time() - 60*60*24*20) { // 20 days back maximum
 
             //TODO: 1/ add ra timestart and timeend coditions
             //      2/ add parent contexts
-
-            $sql = "INSERT INTO {$CFG->prefix}stats_daily (stattype, timeend, courseid, roleid, stat1, stat2)
-
-                    SELECT 'enrolments' AS stattype, $nextmidnight AS timeend, c.instanceid as courseid, r.id as roleid,
-                           (SELECT COUNT('x')
-                              FROM {$CFG->prefix}role_assignments ra1
-                             WHERE ra1.roleid = r.id AND ra1.contextid = c.id) AS stat1,
-                           (SELECT COUNT('x')
-                              FROM {$CFG->prefix}role_assignments ra2
-                             WHERE ra2.roleid = r.id AND ra2.contextid = c.id
-                                   AND EXISTS (SELECT 'x'
-                                                 FROM {$CFG->prefix}log l
-                                                WHERE l.course = c.instanceid AND l.userid = ra2.userid AND $timesql)) AS stat2
-                      FROM {$CFG->prefix}role r, {$CFG->prefix}context c
-                     WHERE c.contextlevel=".CONTEXT_COURSE."
+            $sql = "INSERT INTO {$CFG->prefix}stats_daily
+                      (courseid, roleid, timeend, stattype, stat1, stat2)
+                    SELECT prs.courseid, prs.primary_roleid,
+                               $nextmidnight, 'enrolments',
+                               COUNT(DISTINCT prs.userid), COUNT(DISTINCT l.userid)
+                    FROM ($primary_roles) prs
+                    LEFT OUTER JOIN {$CFG->prefix}log l
+                      ON (prs.courseid=l.course AND prs.userid=l.userid)
+                    WHERE prs.courseid != ".SITEID." AND (l.id IS NULL OR ($timesql))
+                    GROUP BY prs.courseid, prs.primary_roleid
                     HAVING stat1 > 0 OR stat2 > 0";
+            execute_sql($sql, false);
 
+            $sql = "INSERT INTO {$CFG->prefix}stats_daily
+                          (courseid, roleid, timeend, stattype, stat1, stat2)
+                    SELECT ctx.instanceid, COALESCE(ra.roleid, {$defaultfprole}),
+                               $nextmidnight, 'enrolments',
+                               (SELECT COUNT(id) FROM {$CFG->prefix}user where deleted=0),
+                               COUNT(DISTINCT l.userid)
+                    FROM {$CFG->prefix}user u
+                    JOIN {$CFG->prefix}context ctx
+                      ON (ctx.instanceid = ".SITEID." AND ctx.contextlevel=".CONTEXT_COURSE.")
+                    LEFT OUTER JOIN {$CFG->prefix}role_assignments ra
+                      ON (ctx.id=ra.contextid AND u.id=ra.userid)
+                    LEFT OUTER JOIN {$CFG->prefix}log l
+                      ON (l.course=".SITEID." AND u.id=l.userid)
+                    WHERE u.deleted=0 AND (l.id IS NULL OR ($timesql))
+                    GROUP BY ctx.instanceid,ra.roleid";
             execute_sql($sql, false);
+
             $timers[] = time() - $start;
             $start = time();
         } else {
@@ -166,31 +200,41 @@ function stats_cron_daily($maxdays=1) {
         }
 
 
-    /// how many view actions in each course+role
-    //TODO: 1/ add ra timestart and timeend coditions
-    //      2/ add parent contexts
-        $sql = "INSERT INTO {$CFG->prefix}stats_daily (stattype, timeend, courseid, roleid, stat1, stat2)
-
-                SELECT 'activity' AS stattype, $nextmidnight AS timeend, c.instanceid as courseid, r.id as roleid,
-                       (SELECT COUNT(DISTINCT(l1.id))
-                          FROM {$CFG->prefix}role_assignments ra1
-                               INNER JOIN {$CFG->prefix}log l1 ON l1.userid = ra1.userid
-                         WHERE ra1.roleid = r.id AND ra1.contextid = c.id AND l1.course = c.instanceid
-                               $viewactions1 AND $timesql1) AS stat1,
-                       (SELECT COUNT(DISTINCT(l2.id))
-                          FROM {$CFG->prefix}role_assignments ra2
-                               INNER JOIN {$CFG->prefix}log l2 ON l2.userid = ra2.userid
-                         WHERE ra2.roleid = r.id AND ra2.contextid = c.id AND l2.course = c.instanceid
-                               $postactions2 AND $timesql2) AS stat2
-                  FROM {$CFG->prefix}role r, {$CFG->prefix}context c
-                 WHERE c.contextlevel=".CONTEXT_COURSE."
-                HAVING stat1 > 0 OR stat2 > 0";
-
+        // Activities
+        //
+        // - stat1 stores 'view' actions
+        // - stat2 stores 'post' actions
+        // - pl stands for 'parsed log' or processed log
+        // - avoid "view" - may be a reserved word
+        //
+        // Very heavy query - on large sites, we have seen it
+        // take around 30s on Pg 80s and 4 minutes on MySQL
+        //
+        $sql = "INSERT INTO {$CFG->prefix}stats_daily
+                (timeend, stattype, courseid, roleid, stat1, stat2)
+                SELECT $nextmidnight, 'activity',
+                       pl.course, pl.roleid,
+                       SUM(pl.viewaction), SUM(pl.postaction)
+                FROM (
+                  SELECT l.course, COALESCE(prs.primary_roleid, {$CFG->defaultcourseroleid}) AS roleid,
+                         CASE
+                           WHEN l.action IN ($viewactions) THEN 1
+                           ELSE 0
+                         END AS viewaction,
+                         CASE
+                           WHEN l.action IN ($postactions) THEN 1
+                           ELSE 0
+                         END AS postaction
+                  FROM {$CFG->prefix}log l
+                  LEFT OUTER JOIN ($primary_roles) prs
+                    ON (l.userid=prs.userid AND l.course=prs.courseid)
+                  WHERE $timesql
+                ) as pl
+                GROUP BY pl.course,pl.roleid";
         execute_sql($sql, false);
         $timers[] = time() - $start;
         $start = time();
 
-
     /// how many view/post actions in each course total
         $sql = "INSERT INTO {$CFG->prefix}stats_daily (stattype, timeend, courseid, roleid, stat1, stat2)
 
@@ -198,11 +242,11 @@ function stats_cron_daily($maxdays=1) {
                        (SELECT COUNT(DISTINCT(l1.id))
                           FROM {$CFG->prefix}log l1
                          WHERE l1.course = c.id
-                               $viewactions1 AND $timesql1) AS stat1,
+                               l1.action IN ($viewactions) AND $timesql1) AS stat1,
                        (SELECT COUNT(DISTINCT(l2.id))
                           FROM {$CFG->prefix}log l2
                          WHERE l2.course = c.id
-                               $postactions2 AND $timesql2) AS stat2
+                               AND l2.action IN ($postactions) AND $timesql2) AS stat2
                   FROM {$CFG->prefix}course c
                  WHERE EXISTS (SELECT 'x'
                                  FROM {$CFG->prefix}log l
@@ -221,11 +265,11 @@ function stats_cron_daily($maxdays=1) {
                        (SELECT COUNT('x')
                           FROM {$CFG->prefix}log l1
                          WHERE $timesql1 AND l1.userid = d.userid AND
-                               l1.course = d.courseid $viewactions1 AND $timesql1) AS statsreads,
+                               l1.course = d.courseid AND l1 IN ($viewactions) AND $timesql1) AS statsreads,
                        (SELECT COUNT('x')
                           FROM {$CFG->prefix}log l2
                          WHERE $timesql2 AND l2.userid = d.userid AND
-                               l2.course = d.courseid $postactions2 AND $timesql2) AS statswrites
+                               l2.course = d.courseid AND l2.action IN ($postactions) AND $timesql2) AS statswrites
                   FROM (SELECT DISTINCT u.id AS userid, c.id AS courseid
                           FROM {$CFG->prefix}user u, {$CFG->prefix}course c, {$CFG->prefix}log l
                          WHERE u.id = l.userid AND c.id = l.course AND $timesql) d
@@ -848,41 +892,32 @@ function stats_get_post_actions() {
     return array('add','delete','edit','add mod','delete mod','edit section'.'enrol','loginas','new','unenrol','update','update mod');
 }
 
-function stats_get_action_sql_in($str, $alias='l') {
+function stats_get_action_names($str) {
     global $CFG;
-
-    static $cache = array();
-    static $mods = null;
-
-    if (!isset($cache[$str])) {
-        if (!isset($mods)) {
-            $mods = get_records('modules', 'visible', 1);
-            foreach ($mods as $mod) {
-                $file = $CFG->dirroot.'/mod/'.$mod->name.'/lib.php';
-                if (!is_readable($file)) {
-                    continue;
-                }
-                include_once($file);
-            }
+    
+    $mods = get_records('modules');
+    $function = 'stats_get_'.$str.'_actions';
+    $actions = $function();
+    foreach ($mods as $mod) {
+        $file = $CFG->dirroot.'/mod/'.$mod->name.'/lib.php';
+        if (!is_readable($file)) {
+            continue;
         }
-        $function = 'stats_get_'.$str.'_actions';
-        $actions = $function();
-        foreach ($mods as $mod) {
-            $function = $mod->name.'_get_'.$str.'_actions';
-            if (function_exists($function)) {
-                $actions = array_merge($actions, $function());
-            }
+        require_once($file);
+        $function = $mod->name.'_get_'.$str.'_actions';
+        if (function_exists($function)) {
+            $actions = array_merge($actions,$function());
         }
-        $cache[$str] = array_unique($actions);
     }
 
-    if (empty($cache[$str])) {
-        return " ";
-    } else if (count($cache[$str]) == 1) {
-        return " AND $alias.action = ".reset($cache[$str])." ";
-    } else {
-        return " AND $alias.action IN ('".implode("','", $cache[$str])."') ";
+    // The array_values() forces a stack-like array
+    // so we can later loop over safely...
+    $actions =  array_values(array_unique($actions));
+    $c = count($actions);
+    for ($n=0;$n<$c;$n++) {
+        $actions[$n] = "'" . $actions[$n] . "'"; // quote them for SQL
     }
+    return $actions;
 }
 
 function stats_get_time_options($now,$lastweekend,$lastmonthend,$earliestday,$earliestweek,$earliestmonth) {
-- 
1.5.4.rc1.1136.g2794f

