Fix: filtered payroll report inflates worker totals by N^2
Reported: when the generate-report page is filtered by BOTH project and
team, every amount in the "Worker Breakdown" and "Payments by Date"
tables blew up by ~100x. Example: Billy Baloyi R 5,400 (correct)
became R 604,800 (wrong, 112x) after selecting Wilkot + Civils One.
Root cause:
_build_report_context chained `records.filter(work_logs__project_id=X)
.distinct().filter(work_logs__team_id=Y).distinct()`. In Django's ORM
each chained M2M filter creates a SEPARATE JOIN alias on
core_payrollrecord_work_logs, so the SQL produces the cartesian product
of (matching-logs-for-project) x (matching-logs-for-team) rows per
PayrollRecord. A downstream `.values().annotate(Sum('amount_paid'))`
then summed across those duplicated rows - inflating every total by
N * M where N and M are the log counts per record.
Why total_paid_out looked correct: `.aggregate(Sum(...))` wraps the
query in a subquery when distinct() is in play, so it dedupes before
summing. `.values().annotate(Sum(...))` uses GROUP BY on the raw
joined rows and doesn't get that help.
Fix:
Replace chained M2M filters with id__in subquery filters:
records.filter(id__in=PayrollRecord.objects.filter(
work_logs__project_id=X).values('id'))
This keeps the outer queryset JOIN-free, so values().annotate(Sum())
aggregates over distinct records. Same pattern applied to the
adjustments team-filter (worker__teams M2M) for the adjustment
summary.
Tests: 5 new regression tests in ReportContextFilterInflationTests
covering project-only, team-only, both-filters, total_paid_out
invariant, and the adjustment summary path. All 24 tests pass
(19 existing + 5 new).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6d37d1ba9b
commit
f1e246ce24
106
core/tests.py
106
core/tests.py
@ -10,7 +10,7 @@ from django.test import TestCase
|
||||
from django.urls import reverse
|
||||
|
||||
from core.models import Project, Team, Worker, WorkLog, PayrollRecord, PayrollAdjustment
|
||||
from core.views import _build_work_log_payroll_context
|
||||
from core.views import _build_work_log_payroll_context, _build_report_context
|
||||
|
||||
|
||||
class WorkLogPayrollContextTests(TestCase):
|
||||
@ -312,3 +312,107 @@ class WorkLogPayrollDetailTests(TestCase):
|
||||
url = reverse('work_log_payroll_detail', args=[self.log.id])
|
||||
resp = self.client.get(url)
|
||||
self.assertEqual(resp.status_code, 403)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# === TESTS FOR PAYROLL REPORT FILTER INFLATION ===
|
||||
# Regression tests for the M2M double-JOIN bug in _build_report_context.
|
||||
#
|
||||
# THE BUG (fixed 2026-04-23):
|
||||
# Filtering a report by project AND team via chained `.filter(work_logs__field=X)`
|
||||
# calls produced TWO separate JOIN aliases on the core_payrollrecord_work_logs
|
||||
# M2M table. Any downstream `.values().annotate(Sum(...))` then aggregated
|
||||
# across the cartesian product of matching rows, inflating every per-worker
|
||||
# total_paid and every payments_by_date amount by N² (where N = number of
|
||||
# matching work logs per record). Total_paid_out itself was correct because
|
||||
# `.aggregate(Sum(...))` wraps distinct() in a subquery, but the per-row
|
||||
# values/annotate pattern doesn't get that help.
|
||||
#
|
||||
# These tests lock down the fix: with filters applied, the worker-level and
|
||||
# date-level totals must equal the real payment amount, not a multiplied one.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ReportContextFilterInflationTests(TestCase):
|
||||
"""Report aggregates must not inflate when project + team filters combine."""
|
||||
|
||||
def setUp(self):
|
||||
self.admin = User.objects.create_user(username='admin-r', is_staff=True)
|
||||
self.project = Project.objects.create(name='Solar Farm Gamma')
|
||||
self.team = Team.objects.create(name='Team Gamma', supervisor=self.admin)
|
||||
self.worker = Worker.objects.create(
|
||||
name='Test Worker', id_number='TW1', monthly_salary=Decimal('4000')
|
||||
)
|
||||
# Worker must be in the team's M2M for the adjustment-summary test
|
||||
# to find them via worker__teams__id=team_id. In real data this is
|
||||
# the standard setup — workers belong to a team.
|
||||
self.team.workers.add(self.worker)
|
||||
# Three work logs in the range, all in the same project + team.
|
||||
# This is the minimum setup that reproduces the N² inflation: with
|
||||
# one payroll record linked to 3 logs, the double-JOIN produces 9 rows.
|
||||
self.logs = []
|
||||
for day in (5, 10, 15):
|
||||
log = WorkLog.objects.create(
|
||||
date=datetime.date(2026, 3, day),
|
||||
project=self.project,
|
||||
team=self.team,
|
||||
supervisor=self.admin,
|
||||
)
|
||||
log.workers.add(self.worker)
|
||||
self.logs.append(log)
|
||||
# One payment covering all 3 logs.
|
||||
self.record = PayrollRecord.objects.create(
|
||||
worker=self.worker,
|
||||
amount_paid=Decimal('600.00'),
|
||||
date=datetime.date(2026, 3, 20),
|
||||
)
|
||||
self.record.work_logs.add(*self.logs)
|
||||
|
||||
def _ctx(self, project_id=None, team_id=None):
|
||||
return _build_report_context(
|
||||
datetime.date(2026, 3, 1),
|
||||
datetime.date(2026, 3, 31),
|
||||
project_id=project_id,
|
||||
team_id=team_id,
|
||||
)
|
||||
|
||||
def test_worker_breakdown_not_inflated_with_project_filter_only(self):
|
||||
ctx = self._ctx(project_id=self.project.id)
|
||||
self.assertEqual(len(ctx['worker_breakdown']), 1)
|
||||
# Pre-fix: this was 600 × 3 = 1800 (one JOIN, 3-way inflation).
|
||||
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
|
||||
|
||||
def test_worker_breakdown_not_inflated_with_both_filters(self):
|
||||
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||
self.assertEqual(len(ctx['worker_breakdown']), 1)
|
||||
# Pre-fix: this was 600 × 3 × 3 = 5400 (two JOINs, 9-way inflation).
|
||||
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
|
||||
|
||||
def test_payments_by_date_not_inflated_with_both_filters(self):
|
||||
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||
payments = list(ctx['payments_by_date'])
|
||||
self.assertEqual(len(payments), 1)
|
||||
self.assertEqual(payments[0]['total'], Decimal('600.00'))
|
||||
|
||||
def test_total_paid_out_stays_correct_with_both_filters(self):
|
||||
"""Regression guard: total_paid_out was ALREADY correct pre-fix
|
||||
because .aggregate() handles distinct() via a subquery. Lock it in
|
||||
so a future refactor doesn't accidentally reintroduce inflation here."""
|
||||
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||
self.assertEqual(ctx['total_paid_out'], Decimal('600.00'))
|
||||
|
||||
def test_adjustment_summary_not_inflated_with_team_filter(self):
|
||||
"""Adjustments filtered by team go through worker__teams (M2M) — same
|
||||
bug class. values().annotate(Sum()) would inflate if the worker is in
|
||||
multiple teams or if the JOIN is chained with other M2M filters."""
|
||||
PayrollAdjustment.objects.create(
|
||||
worker=self.worker,
|
||||
project=self.project,
|
||||
type='Bonus',
|
||||
amount=Decimal('100.00'),
|
||||
date=datetime.date(2026, 3, 10),
|
||||
description='Test bonus',
|
||||
)
|
||||
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||
totals = {item['type']: item['total'] for item in ctx['adjustment_totals']}
|
||||
self.assertEqual(totals.get('Bonus'), Decimal('100.00'))
|
||||
|
||||
@ -1878,11 +1878,30 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
|
||||
date_filter = Q(date__gte=start_date, date__lte=end_date)
|
||||
|
||||
# --- PayrollRecords in range ---
|
||||
#
|
||||
# IMPORTANT — avoid M2M double-JOIN inflation:
|
||||
# Chaining `.filter(work_logs__project_id=X).distinct().filter(work_logs__team_id=Y)`
|
||||
# creates TWO separate JOIN aliases on core_payrollrecord_work_logs. Any
|
||||
# later `.values().annotate(Sum())` then aggregates across the cartesian
|
||||
# product of matching rows, inflating per-worker and per-date totals by
|
||||
# N × M (where N and M are the counts of matching logs per record).
|
||||
# `.aggregate(Sum())` is safe because Django wraps it in a subquery when
|
||||
# distinct() is in play, but `.values().annotate(Sum())` isn't — so we
|
||||
# use id__in subqueries to keep the outer queryset JOIN-free.
|
||||
# See ReportContextFilterInflationTests for regression coverage.
|
||||
records = PayrollRecord.objects.filter(date_filter)
|
||||
if project_id:
|
||||
records = records.filter(work_logs__project_id=project_id).distinct()
|
||||
records = records.filter(
|
||||
id__in=PayrollRecord.objects.filter(
|
||||
work_logs__project_id=project_id
|
||||
).values('id')
|
||||
)
|
||||
if team_id:
|
||||
records = records.filter(work_logs__team_id=team_id).distinct()
|
||||
records = records.filter(
|
||||
id__in=PayrollRecord.objects.filter(
|
||||
work_logs__team_id=team_id
|
||||
).values('id')
|
||||
)
|
||||
|
||||
# --- Total Paid Out (sum of all payments made) ---
|
||||
total_paid_out = records.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0.00')
|
||||
@ -1895,11 +1914,16 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
|
||||
)
|
||||
|
||||
# --- Adjustments in range ---
|
||||
# project_id filters via an FK column (no JOIN inflation risk), but
|
||||
# team_id goes through worker__teams M2M — apply the same subquery
|
||||
# pattern as above to keep adj_by_type's values().annotate(Sum()) safe.
|
||||
adjustments = PayrollAdjustment.objects.filter(date_filter)
|
||||
if project_id:
|
||||
adjustments = adjustments.filter(project_id=project_id)
|
||||
if team_id:
|
||||
adjustments = adjustments.filter(worker__teams__id=team_id).distinct()
|
||||
adjustments = adjustments.filter(
|
||||
worker__in=Worker.objects.filter(teams__id=team_id).values('id')
|
||||
)
|
||||
|
||||
# --- Work Logs in range (for calculating actual labour cost) ---
|
||||
work_logs_qs = WorkLog.objects.filter(date__gte=start_date, date__lte=end_date)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user