Fix: filtered payroll report inflates worker totals by N^2

Reported: when the generate-report page is filtered by BOTH project and
team, every amount in the "Worker Breakdown" and "Payments by Date"
tables blew up by ~100x. Example: Billy Baloyi R 5,400 (correct)
became R 604,800 (wrong, 112x) after selecting Wilkot + Civils One.

Root cause:
_build_report_context chained `records.filter(work_logs__project_id=X)
.distinct().filter(work_logs__team_id=Y).distinct()`. In Django's ORM
each chained M2M filter creates a SEPARATE JOIN alias on
core_payrollrecord_work_logs, so the SQL produces the cartesian product
of (matching-logs-for-project) x (matching-logs-for-team) rows per
PayrollRecord. A downstream `.values().annotate(Sum('amount_paid'))`
then summed across those duplicated rows - inflating every total by
N * M where N and M are the log counts per record.

Why total_paid_out looked correct: `.aggregate(Sum(...))` wraps the
query in a subquery when distinct() is in play, so it dedupes before
summing. `.values().annotate(Sum(...))` uses GROUP BY on the raw
joined rows and doesn't get that help.

Fix:
Replace chained M2M filters with id__in subquery filters:
  records.filter(id__in=PayrollRecord.objects.filter(
      work_logs__project_id=X).values('id'))
This keeps the outer queryset JOIN-free, so values().annotate(Sum())
aggregates over distinct records. Same pattern applied to the
adjustments team-filter (worker__teams M2M) for the adjustment
summary.

Tests: 5 new regression tests in ReportContextFilterInflationTests
covering project-only, team-only, both-filters, total_paid_out
invariant, and the adjustment summary path. All 24 tests pass
(19 existing + 5 new).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Konrad du Plessis 2026-04-22 19:51:07 +02:00
parent 6d37d1ba9b
commit f1e246ce24
2 changed files with 132 additions and 4 deletions

View File

@ -10,7 +10,7 @@ from django.test import TestCase
from django.urls import reverse from django.urls import reverse
from core.models import Project, Team, Worker, WorkLog, PayrollRecord, PayrollAdjustment from core.models import Project, Team, Worker, WorkLog, PayrollRecord, PayrollAdjustment
from core.views import _build_work_log_payroll_context from core.views import _build_work_log_payroll_context, _build_report_context
class WorkLogPayrollContextTests(TestCase): class WorkLogPayrollContextTests(TestCase):
@ -312,3 +312,107 @@ class WorkLogPayrollDetailTests(TestCase):
url = reverse('work_log_payroll_detail', args=[self.log.id]) url = reverse('work_log_payroll_detail', args=[self.log.id])
resp = self.client.get(url) resp = self.client.get(url)
self.assertEqual(resp.status_code, 403) self.assertEqual(resp.status_code, 403)
# =============================================================================
# === TESTS FOR PAYROLL REPORT FILTER INFLATION ===
# Regression tests for the M2M double-JOIN bug in _build_report_context.
#
# THE BUG (fixed 2026-04-23):
# Filtering a report by project AND team via chained `.filter(work_logs__field=X)`
# calls produced TWO separate JOIN aliases on the core_payrollrecord_work_logs
# M2M table. Any downstream `.values().annotate(Sum(...))` then aggregated
# across the cartesian product of matching rows, inflating every per-worker
# total_paid and every payments_by_date amount by N² (where N = number of
# matching work logs per record). Total_paid_out itself was correct because
# `.aggregate(Sum(...))` wraps distinct() in a subquery, but the per-row
# values/annotate pattern doesn't get that help.
#
# These tests lock down the fix: with filters applied, the worker-level and
# date-level totals must equal the real payment amount, not a multiplied one.
# =============================================================================
class ReportContextFilterInflationTests(TestCase):
"""Report aggregates must not inflate when project + team filters combine."""
def setUp(self):
self.admin = User.objects.create_user(username='admin-r', is_staff=True)
self.project = Project.objects.create(name='Solar Farm Gamma')
self.team = Team.objects.create(name='Team Gamma', supervisor=self.admin)
self.worker = Worker.objects.create(
name='Test Worker', id_number='TW1', monthly_salary=Decimal('4000')
)
# Worker must be in the team's M2M for the adjustment-summary test
# to find them via worker__teams__id=team_id. In real data this is
# the standard setup — workers belong to a team.
self.team.workers.add(self.worker)
# Three work logs in the range, all in the same project + team.
# This is the minimum setup that reproduces the N² inflation: with
# one payroll record linked to 3 logs, the double-JOIN produces 9 rows.
self.logs = []
for day in (5, 10, 15):
log = WorkLog.objects.create(
date=datetime.date(2026, 3, day),
project=self.project,
team=self.team,
supervisor=self.admin,
)
log.workers.add(self.worker)
self.logs.append(log)
# One payment covering all 3 logs.
self.record = PayrollRecord.objects.create(
worker=self.worker,
amount_paid=Decimal('600.00'),
date=datetime.date(2026, 3, 20),
)
self.record.work_logs.add(*self.logs)
def _ctx(self, project_id=None, team_id=None):
return _build_report_context(
datetime.date(2026, 3, 1),
datetime.date(2026, 3, 31),
project_id=project_id,
team_id=team_id,
)
def test_worker_breakdown_not_inflated_with_project_filter_only(self):
ctx = self._ctx(project_id=self.project.id)
self.assertEqual(len(ctx['worker_breakdown']), 1)
# Pre-fix: this was 600 × 3 = 1800 (one JOIN, 3-way inflation).
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
def test_worker_breakdown_not_inflated_with_both_filters(self):
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
self.assertEqual(len(ctx['worker_breakdown']), 1)
# Pre-fix: this was 600 × 3 × 3 = 5400 (two JOINs, 9-way inflation).
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
def test_payments_by_date_not_inflated_with_both_filters(self):
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
payments = list(ctx['payments_by_date'])
self.assertEqual(len(payments), 1)
self.assertEqual(payments[0]['total'], Decimal('600.00'))
def test_total_paid_out_stays_correct_with_both_filters(self):
"""Regression guard: total_paid_out was ALREADY correct pre-fix
because .aggregate() handles distinct() via a subquery. Lock it in
so a future refactor doesn't accidentally reintroduce inflation here."""
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
self.assertEqual(ctx['total_paid_out'], Decimal('600.00'))
def test_adjustment_summary_not_inflated_with_team_filter(self):
"""Adjustments filtered by team go through worker__teams (M2M) — same
bug class. values().annotate(Sum()) would inflate if the worker is in
multiple teams or if the JOIN is chained with other M2M filters."""
PayrollAdjustment.objects.create(
worker=self.worker,
project=self.project,
type='Bonus',
amount=Decimal('100.00'),
date=datetime.date(2026, 3, 10),
description='Test bonus',
)
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
totals = {item['type']: item['total'] for item in ctx['adjustment_totals']}
self.assertEqual(totals.get('Bonus'), Decimal('100.00'))

View File

@ -1878,11 +1878,30 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
date_filter = Q(date__gte=start_date, date__lte=end_date) date_filter = Q(date__gte=start_date, date__lte=end_date)
# --- PayrollRecords in range --- # --- PayrollRecords in range ---
#
# IMPORTANT — avoid M2M double-JOIN inflation:
# Chaining `.filter(work_logs__project_id=X).distinct().filter(work_logs__team_id=Y)`
# creates TWO separate JOIN aliases on core_payrollrecord_work_logs. Any
# later `.values().annotate(Sum())` then aggregates across the cartesian
# product of matching rows, inflating per-worker and per-date totals by
# N × M (where N and M are the counts of matching logs per record).
# `.aggregate(Sum())` is safe because Django wraps it in a subquery when
# distinct() is in play, but `.values().annotate(Sum())` isn't — so we
# use id__in subqueries to keep the outer queryset JOIN-free.
# See ReportContextFilterInflationTests for regression coverage.
records = PayrollRecord.objects.filter(date_filter) records = PayrollRecord.objects.filter(date_filter)
if project_id: if project_id:
records = records.filter(work_logs__project_id=project_id).distinct() records = records.filter(
id__in=PayrollRecord.objects.filter(
work_logs__project_id=project_id
).values('id')
)
if team_id: if team_id:
records = records.filter(work_logs__team_id=team_id).distinct() records = records.filter(
id__in=PayrollRecord.objects.filter(
work_logs__team_id=team_id
).values('id')
)
# --- Total Paid Out (sum of all payments made) --- # --- Total Paid Out (sum of all payments made) ---
total_paid_out = records.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0.00') total_paid_out = records.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0.00')
@ -1895,11 +1914,16 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
) )
# --- Adjustments in range --- # --- Adjustments in range ---
# project_id filters via an FK column (no JOIN inflation risk), but
# team_id goes through worker__teams M2M — apply the same subquery
# pattern as above to keep adj_by_type's values().annotate(Sum()) safe.
adjustments = PayrollAdjustment.objects.filter(date_filter) adjustments = PayrollAdjustment.objects.filter(date_filter)
if project_id: if project_id:
adjustments = adjustments.filter(project_id=project_id) adjustments = adjustments.filter(project_id=project_id)
if team_id: if team_id:
adjustments = adjustments.filter(worker__teams__id=team_id).distinct() adjustments = adjustments.filter(
worker__in=Worker.objects.filter(teams__id=team_id).values('id')
)
# --- Work Logs in range (for calculating actual labour cost) --- # --- Work Logs in range (for calculating actual labour cost) ---
work_logs_qs = WorkLog.objects.filter(date__gte=start_date, date__lte=end_date) work_logs_qs = WorkLog.objects.filter(date__gte=start_date, date__lte=end_date)