Ripley

2026-03-22 23:57:03 +00:00 · 2026-03-22 23:57:03 +00:00 · 23199338ec
commit 23199338ec
parent ed62ae0c79
3 changed files with 68 additions and 91 deletions
--- a/core/services/pycache/resolution.cpython-311.pyc
+++ b/core/services/pycache/resolution.cpython-311.pyc
--- a/core/services/resolution.py
+++ b/core/services/resolution.py
@ -7,53 +7,39 @@ from urllib.parse import urljoin, quote
 logger = logging.getLogger(__name__)

 class WebCrawler:
-    """
-    Crawler to extract information from the web without relying on APIs.
-    """
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        })

    def fetch_url(self, url):
-        """
-        Fetch URL and extract basic metadata and image links.
-        """
+        """Fetch URL, extract title, meta description, and top images."""
        try:
+            logger.info(f"Crawling page: {url}")
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")
            
-            # Extract meta tags
-            metadata = {
-                "title": soup.title.string if soup.title else "No title",
-                "description": soup.find("meta", attrs={"name": "description"}),
-            }
-            if metadata["description"]:
-                metadata["description"] = metadata["description"].get("content", "")
-            else:
-                metadata["description"] = ""
+            title = soup.title.string.strip() if soup.title else ""
+            desc_tag = soup.find("meta", attrs={"name": "description"})
+            description = desc_tag.get("content", "").strip() if desc_tag else ""
            
-            # Extract images (top 3)
            images = []
-            for img in soup.find_all("img", limit=3):
+            for img in soup.find_all("img", limit=5):
                src = img.get("src")
-                if src:
-                    images.append(urljoin(url, src))
+                if src and not src.startswith("data:"):
+                    full_src = urljoin(url, src)
+                    images.append(full_src)
            
-            return metadata, images
+            return {"title": title, "description": description}, images
        except Exception as e:
-            logger.error(f"Failed to crawl {url}: {e}")
+            logger.warning(f"Crawling failed for {url}: {e}")
            return None, []

    def search(self, query):
-        """
-        Perform a simulated search on Google using requests.
-        """
+        """Perform a Google search."""
        search_url = f"https://www.google.com/search?q={quote(query)}"
-        logger.info(f"Crawling URL: {search_url}")
-        
        try:
            response = self.session.get(search_url, timeout=10)
            response.raise_for_status()
@ -65,78 +51,44 @@ class WebCrawler:
                link_elem = g.select_one("a")
                if title_elem and link_elem:
                    url = link_elem.get("href")
-                    # Handle Google's link redirecting
                    if url.startswith("/url?q="):
                        url = url.split("/url?q=")[1].split("&")[0]
-                    results.append({
-                        "title": title_elem.get_text(),
-                        "url": url
-                    })
+                    results.append({"title": title_elem.get_text(), "url": url})
            return results
        except Exception as e:
-            logger.error(f"Search failed: {e}")
+            logger.error(f"Search failed for {query}: {e}")
            return []

 class NetworkDiscoveryService:
    @staticmethod
    def perform_osint_search(query):
-        """
-        Performs discovery using Web Crawling, extracting metadata and images.
-        """
-        try:
-            crawler = WebCrawler()
-            search_results = crawler.search(query)
+        """Perform discovery using Web Crawling, extracting metadata and images."""
+        crawler = WebCrawler()
+        search_results = crawler.search(query)
+        
+        source, _ = Source.objects.get_or_create(name='Web Crawler Engine')
+        person, _ = Entity.objects.get_or_create(entity_type='PERSON', value=query, source=source)
+        
+        # Use first valid image found among search results if available
+        found_photo = None
+        
+        for res in search_results[:3]:  # Limit crawling to top 3
+            meta, images = crawler.fetch_url(res['url'])
            
-            source, _ = Source.objects.get_or_create(name='Web Crawler Engine')
+            if images and not found_photo:
+                found_photo = images[0]
            
-            # 1. Create main entity
-            person, _ = Entity.objects.get_or_create(
-                entity_type='PERSON',
-                value=query,
-                source=source
-            )
-            # Default photo fallback
-            person.photo_url = f"https://api.dicebear.com/7.x/pixel-art/svg?seed={query.replace(' ', '+')}"
-            
-            # 2. Extract potential associates and crawl their pages
-            for res in search_results:
-                metadata, images = crawler.fetch_url(res['url'])
-                
-                # If we found an image on their page, prioritize that for the main person if it's the first result
-                if images and not person.photo_url.startswith("https://api.dicebear.com"):
-                    person.photo_url = images[0]
-                elif images and person.photo_url.startswith("https://api.dicebear.com"):
-                    # For demo purposes, set photo from the first relevant page
-                    person.photo_url = images[0]
-
-                # Create associate
-                associate_val = metadata['title'] if metadata and metadata['title'] != "No title" else res['title'][:50]
-                if associate_val != query:
+            if meta:
+                associate_val = meta['title'] or res['title']
+                if associate_val and associate_val.lower() != query.lower():
                    associate, _ = Entity.objects.get_or_create(
-                        entity_type='PERSON',
-                        value=associate_val,
-                        source=source
+                        entity_type='PERSON', value=associate_val, source=source
                    )
-                    
-                    # Store link/metadata info if you have a field for it
-                    
-                    # 3. Create relationship
                    Relationship.objects.get_or_create(
-                        source_entity=person,
-                        target_entity=associate,
-                        relationship_type='ASSOCIATED_WITH',
-                        weight=0.5
+                        source_entity=person, target_entity=associate, 
+                        relationship_type='ASSOCIATED_WITH', weight=0.5
                    )
-            
-            person.save()
-            return person
-
-        except Exception as e:
-            logger.error(f"Error performing web-based discovery for {query}: {e}")
-            return None
-
-class EntityResolutionService:
-    @staticmethod
-    def resolve_identity(identifier_a, identifier_b, probability_threshold=0.8):
-        # Implementation remains unchanged
-        return True
+        
+        person.photo_url = found_photo or f"https://api.dicebear.com/7.x/initials/svg?seed={quote(query)}"
+        person.save()
+        return person
--- a/core/templates/core/dashboard.html
+++ b/core/templates/core/dashboard.html
@ -6,6 +6,7 @@
    .node-group { cursor: pointer; }
    .node-circle { stroke: #fff; stroke-width: 2px; }
    .node-text { font-size: 10px; pointer-events: none; }
+    #loader { display: none; }
 </style>

 <div class="container mt-5">
@ -18,7 +19,12 @@
                    <h5 class="card-title">Network Discovery</h5>
                    <form id="searchForm" class="input-group">
                        <input type="text" id="searchInput" class="form-control" placeholder="Search for a name to map their network...">
-                        <button class="btn btn-primary" type="submit">Discover</button>
+                        <button class="btn btn-primary" id="searchBtn" type="submit">
+                            <span id="btnText">Discover</span>
+                            <div id="loader" class="spinner-border spinner-border-sm" role="status">
+                                <span class="visually-hidden">Loading...</span>
+                            </div>
+                        </button>
                    </form>
                </div>
            </div>
@ -43,13 +49,32 @@ document.getElementById('searchForm').addEventListener('submit', function(e) {
    e.preventDefault();
    const query = document.getElementById('searchInput').value;
    const graphContainer = d3.select("#graphContainer");
-    graphContainer.html('<p class="p-3">Discovering network...</p>');
+    const searchBtn = document.getElementById('searchBtn');
+    const btnText = document.getElementById('btnText');
+    const loader = document.getElementById('loader');
+
+    // UI Loading state
+    searchBtn.disabled = true;
+    btnText.textContent = "Searching...";
+    loader.style.display = "inline-block";
+    graphContainer.html('<p class="p-3 text-muted">Discovering network, please wait...</p>');

    fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`)
-        .then(response => response.json())
+        .then(response => {
+            if (!response.ok) throw new Error("Search failed");
+            return response.json();
+        })
        .then(data => {
            graphContainer.html(''); // clear
            renderGraph(data);
+        })
+        .catch(err => {
+            graphContainer.html(`<p class="p-3 text-danger">Error: ${err.message}</p>`);
+        })
+        .finally(() => {
+            searchBtn.disabled = false;
+            btnText.textContent = "Discover";
+            loader.style.display = "none";
        });
 });

@ -135,4 +160,4 @@ function renderGraph(data) {
    }
 }
 </script>
-{% endblock %}
+{% endblock %}