diff --git a/core/tests.py b/core/tests.py index 8679c60..958696b 100644 --- a/core/tests.py +++ b/core/tests.py @@ -3901,3 +3901,38 @@ class WorkerReportBadParamsTests(TestCase): def test_csv_non_numeric_team_falls_back(self): resp = self.client.get(reverse('worker_batch_report_csv'), {'team': 'abc'}) self.assertEqual(resp.status_code, 200) + + +# ============================================================================= +# === AUDIT FINDING (Jun 2026) — WORKER REPORT LIFETIME TOTAL INFLATION === +# _build_worker_report_context computed Sum('payroll_records__amount_paid') +# in the SAME .annotate() as counts over work_logs and warnings. Django +# builds ONE query with all the joins, so each payroll record row is +# duplicated once per work-log row — the lifetime "Total Paid" column was +# multiplied by the worker's log count (3 logs + one R100 payslip showed +# R300). The distinct=True counts were immune, which is why it hid. +# ============================================================================= + +class WorkerReportLifetimeTotalsTests(TestCase): + """total_paid_lifetime must not be multiplied by the work-log join.""" + + def test_total_paid_not_inflated_by_work_log_joins(self): + from core.views import _build_worker_report_context + admin = User.objects.create_user(username='infl_admin', is_staff=True) + p = Project.objects.create(name='Infl Project') + w = Worker.objects.create( + name='Infl Worker', id_number='INF-1', + monthly_salary=Decimal('4000')) + for d in (1, 2, 3): # 3 logs → 3 join rows + wl = WorkLog.objects.create( + date=datetime.date(2026, 6, d), project=p, supervisor=admin) + wl.workers.add(w) + PayrollRecord.objects.create( + worker=w, amount_paid=Decimal('100.00'), + date=datetime.date(2026, 6, 4)) # ONE payslip of R100 + + rows = _build_worker_report_context() + row = next(r for r in rows if r['worker'].id == w.id) + self.assertEqual(row['total_paid_lifetime'], Decimal('100.00')) + self.assertEqual(row['days_worked'], 3) + self.assertEqual(row['payslip_count'], 1) diff --git a/core/views.py b/core/views.py index ec6d87e..517082c 100644 --- a/core/views.py +++ b/core/views.py @@ -1666,38 +1666,71 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None): and warning counts. All aggregates are computed in a single query via annotate/prefetch to avoid N+1 database hits. """ - workers = Worker.objects.all() + base = Worker.objects.all() if status == 'active': - workers = workers.filter(active=True) + base = base.filter(active=True) elif status == 'inactive': - workers = workers.filter(active=False) + base = base.filter(active=False) if project_id: - workers = workers.filter(work_logs__project_id=project_id).distinct() + base = base.filter(work_logs__project_id=project_id).distinct() if team_id: - workers = workers.filter(teams__id=team_id).distinct() + base = base.filter(teams__id=team_id).distinct() - workers = workers.annotate( - _days_worked=Count('work_logs__date', distinct=True), + # === AGGREGATION SAFETY (audit fix, Jun 2026) === + # All four annotations below ride the SAME payroll_records join, so + # they can't multiply each other. The previous version also folded + # work_logs and warnings counts into this one annotate — Django then + # builds a single query joining ALL the relations, and every payroll + # row is duplicated once per work-log row. Result: the lifetime + # "Total Paid" Sum was multiplied by the worker's log count (the + # distinct=True counts were immune, which is why it went unnoticed). + # work_logs / warnings / projects are aggregated in their own batched + # GROUP BY queries below — same numbers, no shared-join cross product, + # and no per-worker queries inside the loop either. + workers = base.annotate( _first_payslip_date=Min('payroll_records__date'), _last_payslip_date=Max('payroll_records__date'), _total_paid_lifetime=Sum('payroll_records__amount_paid'), _payslip_count=Count('payroll_records', distinct=True), - _active_warnings=Count('warnings', distinct=True), - ).order_by('name') + ).prefetch_related('teams', 'certificates').order_by('name') + + worker_id_qs = base.values('id') + + # Days worked per worker (distinct dates across their work logs) + days_by_worker = dict( + WorkLog.objects.filter(workers__id__in=worker_id_qs) + .values('workers__id') + .annotate(days=Count('date', distinct=True)) + .values_list('workers__id', 'days') + ) + # Warning count per worker + warnings_by_worker = dict( + WorkerWarning.objects.filter(worker_id__in=worker_id_qs) + .values('worker_id') + .annotate(n=Count('id')) + .values_list('worker_id', 'n') + ) + # Distinct project names each worker has appeared on + projects_by_worker = {} + for wid, pname in ( + WorkLog.objects.filter(workers__id__in=worker_id_qs) + .values_list('workers__id', 'project__name') + .distinct() + .order_by('project__name') + ): + projects_by_worker.setdefault(wid, []).append(pname) today = timezone.localdate() thirty_days_out = today + datetime.timedelta(days=30) rows = [] for w in workers: - projects = list( - Project.objects.filter(work_logs__workers=w).distinct().values_list('name', flat=True) - ) - teams = list(w.teams.values_list('name', flat=True)) + projects = projects_by_worker.get(w.id, []) + teams = [t.name for t in w.teams.all()] # prefetched — no query - certs = w.certificates.all() - certs_total = certs.count() + certs = list(w.certificates.all()) # prefetched — no query + certs_total = len(certs) certs_active = 0 certs_expiring = 0 certs_expired = 0 @@ -1716,7 +1749,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None): 'worker': w, 'projects': projects, 'teams': teams, - 'days_worked': w._days_worked or 0, + 'days_worked': days_by_worker.get(w.id, 0), 'first_payslip_date': w._first_payslip_date, 'last_payslip_date': w._last_payslip_date, 'total_paid_lifetime': w._total_paid_lifetime or Decimal('0.00'), @@ -1725,7 +1758,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None): 'certs_active': certs_active, 'certs_expiring': certs_expiring, 'certs_expired': certs_expired, - 'warnings_count': w._active_warnings or 0, + 'warnings_count': warnings_by_worker.get(w.id, 0), }) return rows @@ -2559,20 +2592,24 @@ def _build_report_context(start_date, end_date, project_ids=None, team_ids=None) .values_list('workers__id', 'days') ) + # Adjustment totals per (worker, type) — ONE GROUP BY query instead of + # a per-worker-per-type .aggregate() inside the loop below (which fired + # workers × types separate queries, ~112 on a busy month — audit fix, + # Jun 2026). Same numbers, computed once. + adj_by_worker_type = { + (row['worker_id'], row['type']): row['total'] or Decimal('0.00') + for row in adjustments.values('worker_id', 'type').annotate(total=Sum('amount')) + } + worker_breakdown = [] for wr in worker_records: - w_adjs = adjustments.filter(worker_id=wr['worker__id']) # Per-type amounts for this worker (only for types that exist in the period). # Each `adj_values` entry is {'amount': Decimal, 'is_deductive': bool} # so the template can render "-R 500.00" for deductive types. - adj_values = [] - for adj_type in active_adj_types: - amt = w_adjs.filter(type=adj_type).aggregate( - t=Sum('amount'))['t'] or Decimal('0.00') - adj_values.append({ - 'amount': amt, - 'is_deductive': adj_type in DEDUCTIVE_TYPES, - }) + adj_values = [{ + 'amount': adj_by_worker_type.get((wr['worker__id'], adj_type), Decimal('0.00')), + 'is_deductive': adj_type in DEDUCTIVE_TYPES, + } for adj_type in active_adj_types] worker_breakdown.append({ 'name': wr['worker__name'],