From 167c8216feb2a728ea9419bf30488b5cbc97a8cd Mon Sep 17 00:00:00 2001
From: Konrad du Plessis <konradzar@gmail.com>
Date: Fri, 24 Apr 2026 01:30:25 +0200
Subject: [PATCH] fix(perf): Coalesce project FK in adjustment aggregates
 (dedupe)

Spec-review catch on 61c485f: the batched GROUP BY aggregates for
unpaid-per-project and paid-per-project x month were running TWO
filtered queries and summing them in Python. Any adjustment with
BOTH project FK AND work_log.project set was double-counted.

Every Overtime adjustment fits that shape (price_overtime sets
both). So every unpaid Overtime was silently inflating the
outstanding-costs dashboard by its own amount, and every paid
Overtime inflated the Per-project-monthly-payroll stacked chart.

Fix: annotate Coalesce('project_id', 'work_log__project_id') so
each adjustment contributes to exactly one project (matches the
original Q(...) | Q(...) OR-filter semantics).

New regression test locks in the "count once" behaviour with an
Overtime adjustment that has both FKs set. Previously there was no
test covering the sum correctness of outstanding-costs - only
context-key presence.

Tests: 69/69. Query counts per tab: pending 24q / history 24q /
loans 25q / adjustments 32q (2 fewer per tab than 61c485f because
Coalesce folded two filtered queries into one).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 core/tests.py |  99 +++++++++++++++++++++++++++++++++++++
 core/views.py | 132 ++++++++++++++++++++++++++------------------------
 2 files changed, 168 insertions(+), 63 deletions(-)

diff --git a/core/tests.py b/core/tests.py
index 0e4ad8b..28ec2d7 100644
--- a/core/tests.py
+++ b/core/tests.py
@@ -1280,3 +1280,102 @@ class CacheBustTokenTests(TestCase):
             # Reset the module-level path constant so other tests (or reruns)
             # get the real CSS file back.
             cp._CSS_PATH_FOR_TOKEN = cp.Path(settings.BASE_DIR) / 'static' / 'css' / 'custom.css'
+
+
+# === REGRESSION: adjustment project-attribution double-count ===
+# Ticket summary: commit 61c485f replaced a per-project loop using SQL
+# OR (`Q(project=P) | Q(work_log__project=P)`) with two SEPARATE
+# filter+GROUP BY queries that were summed in Python. Any adjustment
+# with BOTH `project_id` AND `work_log.project_id` set matched both
+# queries and got counted TWICE.
+#
+# Every Overtime adjustment fits that shape — `price_overtime()` in
+# views.py sets both FKs to the same project. So every unpaid
+# Overtime silently inflated the outstanding-costs dashboard by its
+# own amount. This test locks in the "count once" behaviour.
+class PayrollDashboardAdjustmentAggregationTests(TestCase):
+    """Regression tests for the per-project adjustment aggregation used by
+    the payroll dashboard. An adjustment with both `project` and
+    `work_log.project` set must contribute its amount ONCE to that
+    project, not twice."""
+
+    def setUp(self):
+        # Admin so the payroll_dashboard view accepts us (is_admin helper
+        # returns True for is_staff OR is_superuser).
+        self.admin = User.objects.create_user(
+            username='double-count-admin',
+            password='pw',
+            is_staff=True,
+            is_superuser=True,
+        )
+        self.client.force_login(self.admin)
+
+        # One active project, one worker, one fresh work log.
+        self.project = Project.objects.create(
+            name='Double-Count Test Site',
+            start_date=datetime.date(2026, 4, 1),
+            active=True,
+        )
+        self.worker = Worker.objects.create(
+            name='DC Worker',
+            id_number='DC1',
+            monthly_salary=Decimal('10000'),
+        )
+        self.work_log = WorkLog.objects.create(
+            date=datetime.date(2026, 4, 23),
+            project=self.project,
+            supervisor=self.admin,
+        )
+        # NOTE: deliberately no workers on the log — we do NOT want
+        # unpaid-log wage cost to pollute this test; we only want to
+        # measure the adjustment contribution.
+
+        # THE KEY SHAPE: one unpaid adjustment with BOTH FKs set to the
+        # same project. This mirrors how price_overtime() creates every
+        # Overtime adjustment (adj.project = worklog.project, and
+        # adj.work_log = worklog).
+        PayrollAdjustment.objects.create(
+            worker=self.worker,
+            type='Overtime',       # additive → contributes positively
+            amount=Decimal('500'),
+            project=self.project,
+            work_log=self.work_log,
+            date=datetime.date(2026, 4, 23),
+            description='Regression-test Overtime',
+        )
+
+    def test_overtime_with_both_project_and_work_log_counts_once(self):
+        """An unpaid Overtime with BOTH project + work_log.project set
+        must contribute its R500 amount once to outstanding-costs —
+        not R1000.
+
+        Before the Coalesce fix, the batched aggregates summed the
+        direct-project group + the work_log-project group separately
+        in Python, so an Overtime adjustment landed in both and got
+        double-counted. After the fix (Coalesce('project_id',
+        'work_log__project_id')) each adjustment row is attributed to
+        exactly one effective project.
+        """
+        response = self.client.get(reverse('payroll_dashboard'))
+        self.assertEqual(response.status_code, 200)
+        outstanding = response.context['outstanding_project_costs']
+
+        # Find the entry for our test project by name (the shape is
+        # {'name': str, 'cost': Decimal} — no 'id' key).
+        ours = next(
+            (p for p in outstanding if p['name'] == self.project.name),
+            None,
+        )
+        self.assertIsNotNone(
+            ours,
+            "Test project should appear in outstanding_project_costs "
+            "(its unpaid Overtime is non-zero).",
+        )
+        self.assertEqual(
+            ours['cost'],
+            Decimal('500'),
+            "Overtime adjustment with both project + work_log.project "
+            "FKs set must count ONCE (R500), not twice (R1000). If "
+            "this fails with R1000 the project-attribution double-"
+            "count bug has reappeared.",
+        )
diff --git a/core/views.py b/core/views.py
index ebf19c9..2f1b9d7 100644
--- a/core/views.py
+++ b/core/views.py
@@ -12,7 +12,7 @@ from django.shortcuts import render, redirect, get_object_or_404
 from django.utils import timezone
 from django.db import transaction
 from django.db.models import Sum, Count, Q, F, Prefetch, Max, Min
-from django.db.models.functions import TruncMonth
+from django.db.models.functions import Coalesce, TruncMonth
 from django.contrib import messages
 from django.contrib.auth.decorators import login_required
 from django.http import JsonResponse, HttpResponseForbidden, HttpResponse
@@ -2797,37 +2797,42 @@ def payroll_dashboard(request):
             if w.id not in paid_worker_ids:
                 project_outstanding_map[log.project_id] += w.daily_rate
 
-    # --- 2. Unpaid-adjustment net per project (batched via two GROUP BYs) ---
-    # Each unpaid adjustment contributes to "its project" (direct FK) OR
-    # its work_log's project. We aggregate both sides and merge in Python.
-    def _sum_adj_by_project(qs, project_col):
-        # Sum adjustment amounts grouped by project_col (e.g. 'project_id'),
-        # separated by type family so we can apply add/subtract correctly.
-        rows = qs.values(project_col, 'type').annotate(total=Sum('amount'))
-        additive = {pid: Decimal('0.00') for pid in active_project_ids}
-        deductive = {pid: Decimal('0.00') for pid in active_project_ids}
-        for row in rows:
-            pid = row[project_col]
-            if pid not in additive:
-                continue
-            if row['type'] in ADDITIVE_TYPES:
-                additive[pid] += row['total']
-            elif row['type'] in DEDUCTIVE_TYPES:
-                deductive[pid] += row['total']
-        return additive, deductive
-
-    unpaid_adj_base = PayrollAdjustment.objects.filter(payroll_record__isnull=True)
-    unpaid_direct_add, unpaid_direct_sub = _sum_adj_by_project(
-        unpaid_adj_base.filter(project_id__in=active_project_ids),
-        'project_id',
+    # --- 2. Unpaid-adjustment net per project (ONE GROUP BY via Coalesce) ---
+    # Each unpaid adjustment is attributed to exactly ONE project: its
+    # direct FK (`project_id`) if set, otherwise its work_log's project.
+    # This mirrors the original OR-filter semantics
+    # (`Q(project=P) | Q(work_log__project=P)`) which naturally dedupes:
+    # a row with BOTH FKs pointing at the same project matches ONCE.
+    #
+    # Why Coalesce matters: every Overtime adjustment is created by
+    # price_overtime() with BOTH adj.project AND adj.work_log.project set
+    # to the same project. A naive "filter by direct FK + filter by
+    # work_log FK + sum both maps in Python" approach double-counts
+    # every Overtime row. Coalesce picks ONE effective project per row
+    # so each amount contributes to the outstanding-costs card once.
+    unpaid_adj_rows = (
+        PayrollAdjustment.objects
+        .filter(payroll_record__isnull=True)
+        .filter(
+            Q(project_id__in=active_project_ids)
+            | Q(work_log__project_id__in=active_project_ids)
+        )
+        .annotate(
+            effective_project_id=Coalesce('project_id', 'work_log__project_id')
+        )
+        .values('effective_project_id', 'type')
+        .annotate(total=Sum('amount'))
     )
-    unpaid_wl_add, unpaid_wl_sub = _sum_adj_by_project(
-        unpaid_adj_base.filter(work_log__project_id__in=active_project_ids),
-        'work_log__project_id',
-    )
-    for pid in active_project_ids:
-        project_outstanding_map[pid] += unpaid_direct_add[pid] + unpaid_wl_add[pid]
-        project_outstanding_map[pid] -= unpaid_direct_sub[pid] + unpaid_wl_sub[pid]
+    for row in unpaid_adj_rows:
+        pid = row['effective_project_id']
+        # Only contribute to active projects we're tracking.
+        if pid not in project_outstanding_map:
+            continue
+        total = row['total'] or Decimal('0.00')
+        if row['type'] in ADDITIVE_TYPES:
+            project_outstanding_map[pid] += total
+        elif row['type'] in DEDUCTIVE_TYPES:
+            project_outstanding_map[pid] -= total
 
     outstanding_project_costs = []
     for project in active_projects_list:
@@ -2873,35 +2878,38 @@ def payroll_dashboard(request):
         daily = Decimal(salary) / Decimal('20.00')
         project_month_wage[key] += daily * row['worker_count']
 
-    # --- 4. Per-project × per-month paid-adjustment net ---
-    paid_adj_base = PayrollAdjustment.objects.filter(
-        payroll_record__isnull=False,
-        date__gte=six_months_ago_date,
-    ).annotate(month=TruncMonth('date'))
-
-    def _sum_paid_adj_by_project_month(qs, project_col):
-        rows = qs.values(project_col, 'month', 'type').annotate(total=Sum('amount'))
-        add = {}
-        sub = {}
-        for row in rows:
-            pid = row[project_col]
-            if pid not in project_outstanding_map:  # only active projects
-                continue
-            key = (pid, row['month'].year, row['month'].month)
-            if row['type'] in ADDITIVE_TYPES:
-                add[key] = add.get(key, Decimal('0.00')) + row['total']
-            elif row['type'] in DEDUCTIVE_TYPES:
-                sub[key] = sub.get(key, Decimal('0.00')) + row['total']
-        return add, sub
-
-    paid_direct_add, paid_direct_sub = _sum_paid_adj_by_project_month(
-        paid_adj_base.filter(project_id__in=active_project_ids),
-        'project_id',
-    )
-    paid_wl_add, paid_wl_sub = _sum_paid_adj_by_project_month(
-        paid_adj_base.filter(work_log__project_id__in=active_project_ids),
-        'work_log__project_id',
+    # --- 4. Per-project × per-month paid-adjustment net (ONE GROUP BY) ---
+    # Same Coalesce trick as site #2: pick ONE effective project per
+    # adjustment row so we don't double-count the stacked-chart bars
+    # for Overtime (which always has both FKs pointing at the same
+    # project — see note on the unpaid block above).
+    paid_adj_rows = (
+        PayrollAdjustment.objects
+        .filter(payroll_record__isnull=False, date__gte=six_months_ago_date)
+        .filter(
+            Q(project_id__in=active_project_ids)
+            | Q(work_log__project_id__in=active_project_ids)
+        )
+        .annotate(
+            effective_project_id=Coalesce('project_id', 'work_log__project_id'),
+            month=TruncMonth('date'),
+        )
+        .values('effective_project_id', 'month', 'type')
+        .annotate(total=Sum('amount'))
     )
+    # Accumulate add/sub per (project, year, month) keys in Python.
+    paid_adj_add = {}
+    paid_adj_sub = {}
+    for row in paid_adj_rows:
+        pid = row['effective_project_id']
+        if pid not in project_outstanding_map:  # only active projects
+            continue
+        key = (pid, row['month'].year, row['month'].month)
+        total = row['total'] or Decimal('0.00')
+        if row['type'] in ADDITIVE_TYPES:
+            paid_adj_add[key] = paid_adj_add.get(key, Decimal('0.00')) + total
+        elif row['type'] in DEDUCTIVE_TYPES:
+            paid_adj_sub[key] = paid_adj_sub.get(key, Decimal('0.00')) + total
 
     project_chart_data = []
     for project in active_projects_list:
@@ -2909,10 +2917,8 @@ def payroll_dashboard(request):
         for y, m in chart_months:
             key = (project.id, y, m)
             month_cost = project_month_wage.get(key, Decimal('0.00'))
-            month_cost += paid_direct_add.get(key, Decimal('0.00'))
-            month_cost += paid_wl_add.get(key, Decimal('0.00'))
-            month_cost -= paid_direct_sub.get(key, Decimal('0.00'))
-            month_cost -= paid_wl_sub.get(key, Decimal('0.00'))
+            month_cost += paid_adj_add.get(key, Decimal('0.00'))
+            month_cost -= paid_adj_sub.get(key, Decimal('0.00'))
             monthly_data.append(float(month_cost))
         if any(v > 0 for v in monthly_data):
             project_chart_data.append({