fix: worker batch report 'Total Paid' was inflated by the work-log join (+ kill its N+1s)
_build_worker_report_context computed Sum('payroll_records__amount_paid')
in the SAME .annotate() as counts over work_logs and warnings. Django
joins ALL those relations into one query, so each payroll row was
duplicated once per work-log row — the lifetime Total Paid column on
/workers/report/ (HTML, CSV and PDF) was multiplied by the worker's
log count (3 logs + one R100 payslip displayed R300). The distinct=True
counts were immune, which is why the suite never caught it. Verified by
a controlled experiment before fixing; regression test added.
Restructure: payroll aggregates stay in one annotate (same join — safe);
days-worked, warnings and project names move to three batched GROUP BY
dicts; teams + certificates are prefetched. Also removes the ~4 queries
per worker the loop used to fire (audit findings #8).
Display-only bug — payment processing was never affected.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
f0f3938621
commit
25910b2861
@ -3901,3 +3901,38 @@ class WorkerReportBadParamsTests(TestCase):
|
||||
def test_csv_non_numeric_team_falls_back(self):
|
||||
resp = self.client.get(reverse('worker_batch_report_csv'), {'team': 'abc'})
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# === AUDIT FINDING (Jun 2026) — WORKER REPORT LIFETIME TOTAL INFLATION ===
|
||||
# _build_worker_report_context computed Sum('payroll_records__amount_paid')
|
||||
# in the SAME .annotate() as counts over work_logs and warnings. Django
|
||||
# builds ONE query with all the joins, so each payroll record row is
|
||||
# duplicated once per work-log row — the lifetime "Total Paid" column was
|
||||
# multiplied by the worker's log count (3 logs + one R100 payslip showed
|
||||
# R300). The distinct=True counts were immune, which is why it hid.
|
||||
# =============================================================================
|
||||
|
||||
class WorkerReportLifetimeTotalsTests(TestCase):
|
||||
"""total_paid_lifetime must not be multiplied by the work-log join."""
|
||||
|
||||
def test_total_paid_not_inflated_by_work_log_joins(self):
|
||||
from core.views import _build_worker_report_context
|
||||
admin = User.objects.create_user(username='infl_admin', is_staff=True)
|
||||
p = Project.objects.create(name='Infl Project')
|
||||
w = Worker.objects.create(
|
||||
name='Infl Worker', id_number='INF-1',
|
||||
monthly_salary=Decimal('4000'))
|
||||
for d in (1, 2, 3): # 3 logs → 3 join rows
|
||||
wl = WorkLog.objects.create(
|
||||
date=datetime.date(2026, 6, d), project=p, supervisor=admin)
|
||||
wl.workers.add(w)
|
||||
PayrollRecord.objects.create(
|
||||
worker=w, amount_paid=Decimal('100.00'),
|
||||
date=datetime.date(2026, 6, 4)) # ONE payslip of R100
|
||||
|
||||
rows = _build_worker_report_context()
|
||||
row = next(r for r in rows if r['worker'].id == w.id)
|
||||
self.assertEqual(row['total_paid_lifetime'], Decimal('100.00'))
|
||||
self.assertEqual(row['days_worked'], 3)
|
||||
self.assertEqual(row['payslip_count'], 1)
|
||||
|
||||
@ -1666,38 +1666,71 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
|
||||
and warning counts. All aggregates are computed in a single query
|
||||
via annotate/prefetch to avoid N+1 database hits.
|
||||
"""
|
||||
workers = Worker.objects.all()
|
||||
base = Worker.objects.all()
|
||||
if status == 'active':
|
||||
workers = workers.filter(active=True)
|
||||
base = base.filter(active=True)
|
||||
elif status == 'inactive':
|
||||
workers = workers.filter(active=False)
|
||||
base = base.filter(active=False)
|
||||
|
||||
if project_id:
|
||||
workers = workers.filter(work_logs__project_id=project_id).distinct()
|
||||
base = base.filter(work_logs__project_id=project_id).distinct()
|
||||
if team_id:
|
||||
workers = workers.filter(teams__id=team_id).distinct()
|
||||
base = base.filter(teams__id=team_id).distinct()
|
||||
|
||||
workers = workers.annotate(
|
||||
_days_worked=Count('work_logs__date', distinct=True),
|
||||
# === AGGREGATION SAFETY (audit fix, Jun 2026) ===
|
||||
# All four annotations below ride the SAME payroll_records join, so
|
||||
# they can't multiply each other. The previous version also folded
|
||||
# work_logs and warnings counts into this one annotate — Django then
|
||||
# builds a single query joining ALL the relations, and every payroll
|
||||
# row is duplicated once per work-log row. Result: the lifetime
|
||||
# "Total Paid" Sum was multiplied by the worker's log count (the
|
||||
# distinct=True counts were immune, which is why it went unnoticed).
|
||||
# work_logs / warnings / projects are aggregated in their own batched
|
||||
# GROUP BY queries below — same numbers, no shared-join cross product,
|
||||
# and no per-worker queries inside the loop either.
|
||||
workers = base.annotate(
|
||||
_first_payslip_date=Min('payroll_records__date'),
|
||||
_last_payslip_date=Max('payroll_records__date'),
|
||||
_total_paid_lifetime=Sum('payroll_records__amount_paid'),
|
||||
_payslip_count=Count('payroll_records', distinct=True),
|
||||
_active_warnings=Count('warnings', distinct=True),
|
||||
).order_by('name')
|
||||
).prefetch_related('teams', 'certificates').order_by('name')
|
||||
|
||||
worker_id_qs = base.values('id')
|
||||
|
||||
# Days worked per worker (distinct dates across their work logs)
|
||||
days_by_worker = dict(
|
||||
WorkLog.objects.filter(workers__id__in=worker_id_qs)
|
||||
.values('workers__id')
|
||||
.annotate(days=Count('date', distinct=True))
|
||||
.values_list('workers__id', 'days')
|
||||
)
|
||||
# Warning count per worker
|
||||
warnings_by_worker = dict(
|
||||
WorkerWarning.objects.filter(worker_id__in=worker_id_qs)
|
||||
.values('worker_id')
|
||||
.annotate(n=Count('id'))
|
||||
.values_list('worker_id', 'n')
|
||||
)
|
||||
# Distinct project names each worker has appeared on
|
||||
projects_by_worker = {}
|
||||
for wid, pname in (
|
||||
WorkLog.objects.filter(workers__id__in=worker_id_qs)
|
||||
.values_list('workers__id', 'project__name')
|
||||
.distinct()
|
||||
.order_by('project__name')
|
||||
):
|
||||
projects_by_worker.setdefault(wid, []).append(pname)
|
||||
|
||||
today = timezone.localdate()
|
||||
thirty_days_out = today + datetime.timedelta(days=30)
|
||||
|
||||
rows = []
|
||||
for w in workers:
|
||||
projects = list(
|
||||
Project.objects.filter(work_logs__workers=w).distinct().values_list('name', flat=True)
|
||||
)
|
||||
teams = list(w.teams.values_list('name', flat=True))
|
||||
projects = projects_by_worker.get(w.id, [])
|
||||
teams = [t.name for t in w.teams.all()] # prefetched — no query
|
||||
|
||||
certs = w.certificates.all()
|
||||
certs_total = certs.count()
|
||||
certs = list(w.certificates.all()) # prefetched — no query
|
||||
certs_total = len(certs)
|
||||
certs_active = 0
|
||||
certs_expiring = 0
|
||||
certs_expired = 0
|
||||
@ -1716,7 +1749,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
|
||||
'worker': w,
|
||||
'projects': projects,
|
||||
'teams': teams,
|
||||
'days_worked': w._days_worked or 0,
|
||||
'days_worked': days_by_worker.get(w.id, 0),
|
||||
'first_payslip_date': w._first_payslip_date,
|
||||
'last_payslip_date': w._last_payslip_date,
|
||||
'total_paid_lifetime': w._total_paid_lifetime or Decimal('0.00'),
|
||||
@ -1725,7 +1758,7 @@ def _build_worker_report_context(status=None, project_id=None, team_id=None):
|
||||
'certs_active': certs_active,
|
||||
'certs_expiring': certs_expiring,
|
||||
'certs_expired': certs_expired,
|
||||
'warnings_count': w._active_warnings or 0,
|
||||
'warnings_count': warnings_by_worker.get(w.id, 0),
|
||||
})
|
||||
return rows
|
||||
|
||||
@ -2559,20 +2592,24 @@ def _build_report_context(start_date, end_date, project_ids=None, team_ids=None)
|
||||
.values_list('workers__id', 'days')
|
||||
)
|
||||
|
||||
# Adjustment totals per (worker, type) — ONE GROUP BY query instead of
|
||||
# a per-worker-per-type .aggregate() inside the loop below (which fired
|
||||
# workers × types separate queries, ~112 on a busy month — audit fix,
|
||||
# Jun 2026). Same numbers, computed once.
|
||||
adj_by_worker_type = {
|
||||
(row['worker_id'], row['type']): row['total'] or Decimal('0.00')
|
||||
for row in adjustments.values('worker_id', 'type').annotate(total=Sum('amount'))
|
||||
}
|
||||
|
||||
worker_breakdown = []
|
||||
for wr in worker_records:
|
||||
w_adjs = adjustments.filter(worker_id=wr['worker__id'])
|
||||
# Per-type amounts for this worker (only for types that exist in the period).
|
||||
# Each `adj_values` entry is {'amount': Decimal, 'is_deductive': bool}
|
||||
# so the template can render "-R 500.00" for deductive types.
|
||||
adj_values = []
|
||||
for adj_type in active_adj_types:
|
||||
amt = w_adjs.filter(type=adj_type).aggregate(
|
||||
t=Sum('amount'))['t'] or Decimal('0.00')
|
||||
adj_values.append({
|
||||
'amount': amt,
|
||||
'is_deductive': adj_type in DEDUCTIVE_TYPES,
|
||||
})
|
||||
adj_values = [{
|
||||
'amount': adj_by_worker_type.get((wr['worker__id'], adj_type), Decimal('0.00')),
|
||||
'is_deductive': adj_type in DEDUCTIVE_TYPES,
|
||||
} for adj_type in active_adj_types]
|
||||
|
||||
worker_breakdown.append({
|
||||
'name': wr['worker__name'],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user