fix: worker batch report 'Total Paid' was inflated by the work-log join (+ kill its N+1s)

_build_worker_report_context computed Sum('payroll_records__amount_paid')
in the SAME .annotate() as counts over work_logs and warnings. Django
joins ALL those relations into one query, so each payroll row was
duplicated once per work-log row — the lifetime Total Paid column on
/workers/report/ (HTML, CSV and PDF) was multiplied by the worker's
log count (3 logs + one R100 payslip displayed R300). The distinct=True
counts were immune, which is why the suite never caught it. Verified by
a controlled experiment before fixing; regression test added.

Restructure: payroll aggregates stay in one annotate (same join — safe);
days-worked, warnings and project names move to three batched GROUP BY
dicts; teams + certificates are prefetched. Also removes the ~4 queries
per worker the loop used to fire (audit findings #8).

Display-only bug — payment processing was never affected.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Konrad du Plessis 2026-06-12 17:56:39 +02:00
parent f0f3938621
commit 25910b2861
2 changed files with 98 additions and 26 deletions

View File

@ -3901,3 +3901,38 @@ class WorkerReportBadParamsTests(TestCase):
def test_csv_non_numeric_team_falls_back(self):
resp = self.client.get(reverse('worker_batch_report_csv'), {'team': 'abc'})
self.assertEqual(resp.status_code, 200)
# =============================================================================
# === AUDIT FINDING (Jun 2026) — WORKER REPORT LIFETIME TOTAL INFLATION ===
# _build_worker_report_context computed Sum('payroll_records__amount_paid')
# in the SAME .annotate() as counts over work_logs and warnings. Django
# builds ONE query with all the joins, so each payroll record row is
# duplicated once per work-log row — the lifetime "Total Paid" column was
# multiplied by the worker's log count (3 logs + one R100 payslip showed
# R300). The distinct=True counts were immune, which is why it hid.
# =============================================================================
class WorkerReportLifetimeTotalsTests(TestCase):
"""total_paid_lifetime must not be multiplied by the work-log join."""
def test_total_paid_not_inflated_by_work_log_joins(self):
from core.views import _build_worker_report_context
admin = User.objects.create_user(username='infl_admin', is_staff=True)
p = Project.objects.create(name='Infl Project')
w = Worker.objects.create(
name='Infl Worker', id_number='INF-1',
monthly_salary=Decimal('4000'))
for d in (1, 2, 3): # 3 logs → 3 join rows
wl = WorkLog.objects.create(
date=datetime.date(2026, 6, d), project=p, supervisor=admin)
wl.workers.add(w)
PayrollRecord.objects.create(
worker=w, amount_paid=Decimal('100.00'),
date=datetime.date(2026, 6, 4)) # ONE payslip of R100
rows = _build_worker_report_context()
row = next(r for r in rows if r['worker'].id == w.id)
self.assertEqual(row['total_paid_lifetime'], Decimal('100.00'))
self.assertEqual(row['days_worked'], 3)
self.assertEqual(row['payslip_count'], 1)

View File

@ -1666,38 +1666,71 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
and warning counts. All aggregates are computed in a single query
via annotate/prefetch to avoid N+1 database hits.
"""
workers = Worker.objects.all()
base = Worker.objects.all()
if status == 'active':
workers = workers.filter(active=True)
base = base.filter(active=True)
elif status == 'inactive':
workers = workers.filter(active=False)
base = base.filter(active=False)
if project_id:
workers = workers.filter(work_logs__project_id=project_id).distinct()
base = base.filter(work_logs__project_id=project_id).distinct()
if team_id:
workers = workers.filter(teams__id=team_id).distinct()
base = base.filter(teams__id=team_id).distinct()
workers = workers.annotate(
_days_worked=Count('work_logs__date', distinct=True),
# === AGGREGATION SAFETY (audit fix, Jun 2026) ===
# All four annotations below ride the SAME payroll_records join, so
# they can't multiply each other. The previous version also folded
# work_logs and warnings counts into this one annotate — Django then
# builds a single query joining ALL the relations, and every payroll
# row is duplicated once per work-log row. Result: the lifetime
# "Total Paid" Sum was multiplied by the worker's log count (the
# distinct=True counts were immune, which is why it went unnoticed).
# work_logs / warnings / projects are aggregated in their own batched
# GROUP BY queries below — same numbers, no shared-join cross product,
# and no per-worker queries inside the loop either.
workers = base.annotate(
_first_payslip_date=Min('payroll_records__date'),
_last_payslip_date=Max('payroll_records__date'),
_total_paid_lifetime=Sum('payroll_records__amount_paid'),
_payslip_count=Count('payroll_records', distinct=True),
_active_warnings=Count('warnings', distinct=True),
).order_by('name')
).prefetch_related('teams', 'certificates').order_by('name')
worker_id_qs = base.values('id')
# Days worked per worker (distinct dates across their work logs)
days_by_worker = dict(
WorkLog.objects.filter(workers__id__in=worker_id_qs)
.values('workers__id')
.annotate(days=Count('date', distinct=True))
.values_list('workers__id', 'days')
)
# Warning count per worker
warnings_by_worker = dict(
WorkerWarning.objects.filter(worker_id__in=worker_id_qs)
.values('worker_id')
.annotate(n=Count('id'))
.values_list('worker_id', 'n')
)
# Distinct project names each worker has appeared on
projects_by_worker = {}
for wid, pname in (
WorkLog.objects.filter(workers__id__in=worker_id_qs)
.values_list('workers__id', 'project__name')
.distinct()
.order_by('project__name')
):
projects_by_worker.setdefault(wid, []).append(pname)
today = timezone.localdate()
thirty_days_out = today + datetime.timedelta(days=30)
rows = []
for w in workers:
projects = list(
Project.objects.filter(work_logs__workers=w).distinct().values_list('name', flat=True)
)
teams = list(w.teams.values_list('name', flat=True))
projects = projects_by_worker.get(w.id, [])
teams = [t.name for t in w.teams.all()] # prefetched — no query
certs = w.certificates.all()
certs_total = certs.count()
certs = list(w.certificates.all()) # prefetched — no query
certs_total = len(certs)
certs_active = 0
certs_expiring = 0
certs_expired = 0
@ -1716,7 +1749,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
'worker': w,
'projects': projects,
'teams': teams,
'days_worked': w._days_worked or 0,
'days_worked': days_by_worker.get(w.id, 0),
'first_payslip_date': w._first_payslip_date,
'last_payslip_date': w._last_payslip_date,
'total_paid_lifetime': w._total_paid_lifetime or Decimal('0.00'),
@ -1725,7 +1758,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
'certs_active': certs_active,
'certs_expiring': certs_expiring,
'certs_expired': certs_expired,
'warnings_count': w._active_warnings or 0,
'warnings_count': warnings_by_worker.get(w.id, 0),
})
return rows
@ -2559,20 +2592,24 @@ def _build_report_context(start_date, end_date, project_ids=None, team_ids=None)
.values_list('workers__id', 'days')
)
# Adjustment totals per (worker, type) — ONE GROUP BY query instead of
# a per-worker-per-type .aggregate() inside the loop below (which fired
# workers × types separate queries, ~112 on a busy month — audit fix,
# Jun 2026). Same numbers, computed once.
adj_by_worker_type = {
(row['worker_id'], row['type']): row['total'] or Decimal('0.00')
for row in adjustments.values('worker_id', 'type').annotate(total=Sum('amount'))
}
worker_breakdown = []
for wr in worker_records:
w_adjs = adjustments.filter(worker_id=wr['worker__id'])
# Per-type amounts for this worker (only for types that exist in the period).
# Each `adj_values` entry is {'amount': Decimal, 'is_deductive': bool}
# so the template can render "-R 500.00" for deductive types.
adj_values = []
for adj_type in active_adj_types:
amt = w_adjs.filter(type=adj_type).aggregate(
t=Sum('amount'))['t'] or Decimal('0.00')
adj_values.append({
'amount': amt,
'is_deductive': adj_type in DEDUCTIVE_TYPES,
})
adj_values = [{
'amount': adj_by_worker_type.get((wr['worker__id'], adj_type), Decimal('0.00')),
'is_deductive': adj_type in DEDUCTIVE_TYPES,
} for adj_type in active_adj_types]
worker_breakdown.append({
'name': wr['worker__name'],