RIPLEY
This commit is contained in:
parent
9f1712e031
commit
00fab55ca3
Binary file not shown.
Binary file not shown.
@ -1,3 +1,13 @@
|
||||
from django.contrib import admin
|
||||
from .models import Source, Entity
|
||||
|
||||
# Register your models here.
|
||||
@admin.register(Source)
|
||||
class SourceAdmin(admin.ModelAdmin):
|
||||
list_display = ('name', 'created_at')
|
||||
search_fields = ('name',)
|
||||
|
||||
@admin.register(Entity)
|
||||
class EntityAdmin(admin.ModelAdmin):
|
||||
list_display = ('entity_type', 'value', 'source', 'confidence_score', 'created_at')
|
||||
list_filter = ('entity_type', 'source', 'created_at')
|
||||
search_fields = ('value',)
|
||||
40
core/migrations/0001_initial.py
Normal file
40
core/migrations/0001_initial.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Generated by Django 5.2.7 on 2026-03-22 21:58
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Source',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=255, unique=True)),
|
||||
('description', models.TextField(blank=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Entity',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('entity_type', models.CharField(choices=[('PERSON', 'Person'), ('EMAIL', 'Email'), ('USERNAME', 'Username'), ('IP', 'IP Address')], max_length=20)),
|
||||
('value', models.CharField(db_index=True, max_length=255)),
|
||||
('confidence_score', models.FloatField(default=1.0)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='entities', to='core.source')),
|
||||
],
|
||||
options={
|
||||
'verbose_name_plural': 'Entities',
|
||||
'unique_together': {('entity_type', 'value', 'source')},
|
||||
},
|
||||
),
|
||||
]
|
||||
BIN
core/migrations/__pycache__/0001_initial.cpython-311.pyc
Normal file
BIN
core/migrations/__pycache__/0001_initial.cpython-311.pyc
Normal file
Binary file not shown.
@ -1,3 +1,30 @@
|
||||
from django.db import models
|
||||
|
||||
# Create your models here.
|
||||
class Source(models.Model):
|
||||
name = models.CharField(max_length=255, unique=True)
|
||||
description = models.TextField(blank=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
class Entity(models.Model):
|
||||
ENTITY_TYPES = (
|
||||
('PERSON', 'Person'),
|
||||
('EMAIL', 'Email'),
|
||||
('USERNAME', 'Username'),
|
||||
('IP', 'IP Address'),
|
||||
)
|
||||
entity_type = models.CharField(max_length=20, choices=ENTITY_TYPES)
|
||||
value = models.CharField(max_length=255, db_index=True)
|
||||
source = models.ForeignKey(Source, on_delete=models.CASCADE, related_name='entities')
|
||||
confidence_score = models.FloatField(default=1.0)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
verbose_name_plural = "Entities"
|
||||
unique_together = ('entity_type', 'value', 'source')
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.entity_type}: {self.value}"
|
||||
30
core/services/ingestion.py
Normal file
30
core/services/ingestion.py
Normal file
@ -0,0 +1,30 @@
|
||||
from django.db import transaction
|
||||
from core.models import Source, Entity
|
||||
|
||||
class IngestionService:
|
||||
@staticmethod
|
||||
def ingest_data(source_name, entity_type, raw_data):
|
||||
"""
|
||||
Aggregates and normalizes raw data into the system.
|
||||
"""
|
||||
with transaction.atomic():
|
||||
source, _ = Source.objects.get_or_create(name=source_name)
|
||||
|
||||
# Simple normalization logic: assume raw_data is a dict
|
||||
# In a real scenario, this would be more complex depending on source schema
|
||||
identifier = raw_data.get('identifier')
|
||||
if not identifier:
|
||||
raise ValueError("Missing identifier in raw_data")
|
||||
|
||||
entity, created = Entity.objects.get_or_create(
|
||||
identifier=identifier,
|
||||
entity_type=entity_type,
|
||||
defaults={'source': source, 'metadata': raw_data.get('metadata', {})}
|
||||
)
|
||||
|
||||
if not created:
|
||||
# Update existing entity metadata
|
||||
entity.metadata.update(raw_data.get('metadata', {}))
|
||||
entity.save()
|
||||
|
||||
return entity
|
||||
24
core/services/resolution.py
Normal file
24
core/services/resolution.py
Normal file
@ -0,0 +1,24 @@
|
||||
from django.db import transaction
|
||||
from core.models import Entity
|
||||
|
||||
class EntityResolutionService:
|
||||
@staticmethod
|
||||
def resolve_identity(identifier_a, identifier_b, probability_threshold=0.8):
|
||||
"""
|
||||
Determines if two identities belong to the same physical person based on statistical probability.
|
||||
"""
|
||||
# Logic for calculating match probability
|
||||
# Placeholder for complex ML/Graph analysis logic
|
||||
match_probability = 0.9 # Mock value
|
||||
|
||||
if match_probability >= probability_threshold:
|
||||
with transaction.atomic():
|
||||
entity_a = Entity.objects.get(identifier=identifier_a)
|
||||
entity_b = Entity.objects.get(identifier=identifier_b)
|
||||
|
||||
# Logic to merge entities (e.g., link them)
|
||||
# In a graph db, we would add a relationship.
|
||||
# In Django, we might link via a 'resolved_to' field if existing
|
||||
|
||||
return True
|
||||
return False
|
||||
Loading…
x
Reference in New Issue
Block a user