Fix: filtered payroll report inflates worker totals by N^2
Reported: when the generate-report page is filtered by BOTH project and
team, every amount in the "Worker Breakdown" and "Payments by Date"
tables blew up by ~100x. Example: Billy Baloyi R 5,400 (correct)
became R 604,800 (wrong, 112x) after selecting Wilkot + Civils One.
Root cause:
_build_report_context chained `records.filter(work_logs__project_id=X)
.distinct().filter(work_logs__team_id=Y).distinct()`. In Django's ORM
each chained M2M filter creates a SEPARATE JOIN alias on
core_payrollrecord_work_logs, so the SQL produces the cartesian product
of (matching-logs-for-project) x (matching-logs-for-team) rows per
PayrollRecord. A downstream `.values().annotate(Sum('amount_paid'))`
then summed across those duplicated rows - inflating every total by
N * M where N and M are the log counts per record.
Why total_paid_out looked correct: `.aggregate(Sum(...))` wraps the
query in a subquery when distinct() is in play, so it dedupes before
summing. `.values().annotate(Sum(...))` uses GROUP BY on the raw
joined rows and doesn't get that help.
Fix:
Replace chained M2M filters with id__in subquery filters:
records.filter(id__in=PayrollRecord.objects.filter(
work_logs__project_id=X).values('id'))
This keeps the outer queryset JOIN-free, so values().annotate(Sum())
aggregates over distinct records. Same pattern applied to the
adjustments team-filter (worker__teams M2M) for the adjustment
summary.
Tests: 5 new regression tests in ReportContextFilterInflationTests
covering project-only, team-only, both-filters, total_paid_out
invariant, and the adjustment summary path. All 24 tests pass
(19 existing + 5 new).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6d37d1ba9b
commit
f1e246ce24
106
core/tests.py
106
core/tests.py
@ -10,7 +10,7 @@ from django.test import TestCase
|
|||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
from core.models import Project, Team, Worker, WorkLog, PayrollRecord, PayrollAdjustment
|
from core.models import Project, Team, Worker, WorkLog, PayrollRecord, PayrollAdjustment
|
||||||
from core.views import _build_work_log_payroll_context
|
from core.views import _build_work_log_payroll_context, _build_report_context
|
||||||
|
|
||||||
|
|
||||||
class WorkLogPayrollContextTests(TestCase):
|
class WorkLogPayrollContextTests(TestCase):
|
||||||
@ -312,3 +312,107 @@ class WorkLogPayrollDetailTests(TestCase):
|
|||||||
url = reverse('work_log_payroll_detail', args=[self.log.id])
|
url = reverse('work_log_payroll_detail', args=[self.log.id])
|
||||||
resp = self.client.get(url)
|
resp = self.client.get(url)
|
||||||
self.assertEqual(resp.status_code, 403)
|
self.assertEqual(resp.status_code, 403)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# === TESTS FOR PAYROLL REPORT FILTER INFLATION ===
|
||||||
|
# Regression tests for the M2M double-JOIN bug in _build_report_context.
|
||||||
|
#
|
||||||
|
# THE BUG (fixed 2026-04-23):
|
||||||
|
# Filtering a report by project AND team via chained `.filter(work_logs__field=X)`
|
||||||
|
# calls produced TWO separate JOIN aliases on the core_payrollrecord_work_logs
|
||||||
|
# M2M table. Any downstream `.values().annotate(Sum(...))` then aggregated
|
||||||
|
# across the cartesian product of matching rows, inflating every per-worker
|
||||||
|
# total_paid and every payments_by_date amount by N² (where N = number of
|
||||||
|
# matching work logs per record). Total_paid_out itself was correct because
|
||||||
|
# `.aggregate(Sum(...))` wraps distinct() in a subquery, but the per-row
|
||||||
|
# values/annotate pattern doesn't get that help.
|
||||||
|
#
|
||||||
|
# These tests lock down the fix: with filters applied, the worker-level and
|
||||||
|
# date-level totals must equal the real payment amount, not a multiplied one.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class ReportContextFilterInflationTests(TestCase):
|
||||||
|
"""Report aggregates must not inflate when project + team filters combine."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.admin = User.objects.create_user(username='admin-r', is_staff=True)
|
||||||
|
self.project = Project.objects.create(name='Solar Farm Gamma')
|
||||||
|
self.team = Team.objects.create(name='Team Gamma', supervisor=self.admin)
|
||||||
|
self.worker = Worker.objects.create(
|
||||||
|
name='Test Worker', id_number='TW1', monthly_salary=Decimal('4000')
|
||||||
|
)
|
||||||
|
# Worker must be in the team's M2M for the adjustment-summary test
|
||||||
|
# to find them via worker__teams__id=team_id. In real data this is
|
||||||
|
# the standard setup — workers belong to a team.
|
||||||
|
self.team.workers.add(self.worker)
|
||||||
|
# Three work logs in the range, all in the same project + team.
|
||||||
|
# This is the minimum setup that reproduces the N² inflation: with
|
||||||
|
# one payroll record linked to 3 logs, the double-JOIN produces 9 rows.
|
||||||
|
self.logs = []
|
||||||
|
for day in (5, 10, 15):
|
||||||
|
log = WorkLog.objects.create(
|
||||||
|
date=datetime.date(2026, 3, day),
|
||||||
|
project=self.project,
|
||||||
|
team=self.team,
|
||||||
|
supervisor=self.admin,
|
||||||
|
)
|
||||||
|
log.workers.add(self.worker)
|
||||||
|
self.logs.append(log)
|
||||||
|
# One payment covering all 3 logs.
|
||||||
|
self.record = PayrollRecord.objects.create(
|
||||||
|
worker=self.worker,
|
||||||
|
amount_paid=Decimal('600.00'),
|
||||||
|
date=datetime.date(2026, 3, 20),
|
||||||
|
)
|
||||||
|
self.record.work_logs.add(*self.logs)
|
||||||
|
|
||||||
|
def _ctx(self, project_id=None, team_id=None):
|
||||||
|
return _build_report_context(
|
||||||
|
datetime.date(2026, 3, 1),
|
||||||
|
datetime.date(2026, 3, 31),
|
||||||
|
project_id=project_id,
|
||||||
|
team_id=team_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_worker_breakdown_not_inflated_with_project_filter_only(self):
|
||||||
|
ctx = self._ctx(project_id=self.project.id)
|
||||||
|
self.assertEqual(len(ctx['worker_breakdown']), 1)
|
||||||
|
# Pre-fix: this was 600 × 3 = 1800 (one JOIN, 3-way inflation).
|
||||||
|
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
|
||||||
|
|
||||||
|
def test_worker_breakdown_not_inflated_with_both_filters(self):
|
||||||
|
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||||
|
self.assertEqual(len(ctx['worker_breakdown']), 1)
|
||||||
|
# Pre-fix: this was 600 × 3 × 3 = 5400 (two JOINs, 9-way inflation).
|
||||||
|
self.assertEqual(ctx['worker_breakdown'][0]['total_paid'], Decimal('600.00'))
|
||||||
|
|
||||||
|
def test_payments_by_date_not_inflated_with_both_filters(self):
|
||||||
|
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||||
|
payments = list(ctx['payments_by_date'])
|
||||||
|
self.assertEqual(len(payments), 1)
|
||||||
|
self.assertEqual(payments[0]['total'], Decimal('600.00'))
|
||||||
|
|
||||||
|
def test_total_paid_out_stays_correct_with_both_filters(self):
|
||||||
|
"""Regression guard: total_paid_out was ALREADY correct pre-fix
|
||||||
|
because .aggregate() handles distinct() via a subquery. Lock it in
|
||||||
|
so a future refactor doesn't accidentally reintroduce inflation here."""
|
||||||
|
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||||
|
self.assertEqual(ctx['total_paid_out'], Decimal('600.00'))
|
||||||
|
|
||||||
|
def test_adjustment_summary_not_inflated_with_team_filter(self):
|
||||||
|
"""Adjustments filtered by team go through worker__teams (M2M) — same
|
||||||
|
bug class. values().annotate(Sum()) would inflate if the worker is in
|
||||||
|
multiple teams or if the JOIN is chained with other M2M filters."""
|
||||||
|
PayrollAdjustment.objects.create(
|
||||||
|
worker=self.worker,
|
||||||
|
project=self.project,
|
||||||
|
type='Bonus',
|
||||||
|
amount=Decimal('100.00'),
|
||||||
|
date=datetime.date(2026, 3, 10),
|
||||||
|
description='Test bonus',
|
||||||
|
)
|
||||||
|
ctx = self._ctx(project_id=self.project.id, team_id=self.team.id)
|
||||||
|
totals = {item['type']: item['total'] for item in ctx['adjustment_totals']}
|
||||||
|
self.assertEqual(totals.get('Bonus'), Decimal('100.00'))
|
||||||
|
|||||||
@ -1878,11 +1878,30 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
|
|||||||
date_filter = Q(date__gte=start_date, date__lte=end_date)
|
date_filter = Q(date__gte=start_date, date__lte=end_date)
|
||||||
|
|
||||||
# --- PayrollRecords in range ---
|
# --- PayrollRecords in range ---
|
||||||
|
#
|
||||||
|
# IMPORTANT — avoid M2M double-JOIN inflation:
|
||||||
|
# Chaining `.filter(work_logs__project_id=X).distinct().filter(work_logs__team_id=Y)`
|
||||||
|
# creates TWO separate JOIN aliases on core_payrollrecord_work_logs. Any
|
||||||
|
# later `.values().annotate(Sum())` then aggregates across the cartesian
|
||||||
|
# product of matching rows, inflating per-worker and per-date totals by
|
||||||
|
# N × M (where N and M are the counts of matching logs per record).
|
||||||
|
# `.aggregate(Sum())` is safe because Django wraps it in a subquery when
|
||||||
|
# distinct() is in play, but `.values().annotate(Sum())` isn't — so we
|
||||||
|
# use id__in subqueries to keep the outer queryset JOIN-free.
|
||||||
|
# See ReportContextFilterInflationTests for regression coverage.
|
||||||
records = PayrollRecord.objects.filter(date_filter)
|
records = PayrollRecord.objects.filter(date_filter)
|
||||||
if project_id:
|
if project_id:
|
||||||
records = records.filter(work_logs__project_id=project_id).distinct()
|
records = records.filter(
|
||||||
|
id__in=PayrollRecord.objects.filter(
|
||||||
|
work_logs__project_id=project_id
|
||||||
|
).values('id')
|
||||||
|
)
|
||||||
if team_id:
|
if team_id:
|
||||||
records = records.filter(work_logs__team_id=team_id).distinct()
|
records = records.filter(
|
||||||
|
id__in=PayrollRecord.objects.filter(
|
||||||
|
work_logs__team_id=team_id
|
||||||
|
).values('id')
|
||||||
|
)
|
||||||
|
|
||||||
# --- Total Paid Out (sum of all payments made) ---
|
# --- Total Paid Out (sum of all payments made) ---
|
||||||
total_paid_out = records.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0.00')
|
total_paid_out = records.aggregate(total=Sum('amount_paid'))['total'] or Decimal('0.00')
|
||||||
@ -1895,11 +1914,16 @@ def _build_report_context(start_date, end_date, project_id=None, team_id=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- Adjustments in range ---
|
# --- Adjustments in range ---
|
||||||
|
# project_id filters via an FK column (no JOIN inflation risk), but
|
||||||
|
# team_id goes through worker__teams M2M — apply the same subquery
|
||||||
|
# pattern as above to keep adj_by_type's values().annotate(Sum()) safe.
|
||||||
adjustments = PayrollAdjustment.objects.filter(date_filter)
|
adjustments = PayrollAdjustment.objects.filter(date_filter)
|
||||||
if project_id:
|
if project_id:
|
||||||
adjustments = adjustments.filter(project_id=project_id)
|
adjustments = adjustments.filter(project_id=project_id)
|
||||||
if team_id:
|
if team_id:
|
||||||
adjustments = adjustments.filter(worker__teams__id=team_id).distinct()
|
adjustments = adjustments.filter(
|
||||||
|
worker__in=Worker.objects.filter(teams__id=team_id).values('id')
|
||||||
|
)
|
||||||
|
|
||||||
# --- Work Logs in range (for calculating actual labour cost) ---
|
# --- Work Logs in range (for calculating actual labour cost) ---
|
||||||
work_logs_qs = WorkLog.objects.filter(date__gte=start_date, date__lte=end_date)
|
work_logs_qs = WorkLog.objects.filter(date__gte=start_date, date__lte=end_date)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user