This commit is contained in:
Flatlogic Bot 2026-03-23 00:16:45 +00:00
parent 4fda4c24bb
commit 5fa2cf7ba7
3 changed files with 20 additions and 17 deletions

View File

@ -2,7 +2,7 @@ import requests
import logging import logging
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from core.models import Entity, Relationship, Source from core.models import Entity, Relationship, Source
from urllib.parse import urljoin, quote from urllib.parse import urljoin, quote, unquote
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,7 +16,7 @@ class WebCrawler:
def fetch_url(self, url): def fetch_url(self, url):
"""Fetch URL, extract title, meta description, and top images.""" """Fetch URL, extract title, meta description, and top images."""
try: try:
logger.info(f"Crawling page: {url}") logger.info(f"CRAWLER: Fetching {url}")
response = self.session.get(url, timeout=10) response = self.session.get(url, timeout=10)
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
@ -34,29 +34,33 @@ class WebCrawler:
return {"title": title, "description": description}, images return {"title": title, "description": description}, images
except Exception as e: except Exception as e:
logger.warning(f"Crawling failed for {url}: {e}") logger.error(f"CRAWLER ERROR: {url}: {e}")
return None, [] return None, []
def search(self, query): def search(self, query):
"""Perform a Google search.""" """Perform a DuckDuckGo search."""
search_url = f"https://www.google.com/search?q={quote(query)}" search_url = f"https://duckduckgo.com/html/?q={quote(query)}"
try: try:
response = self.session.get(search_url, timeout=10) response = self.session.get(search_url, timeout=10)
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
results = [] results = []
for g in soup.select("div.g"): for res in soup.find_all("div", class_="result"):
title_elem = g.select_one("h3") a_tag = res.find("a", class_="result__a")
link_elem = g.select_one("a") if a_tag and a_tag.get("href"):
if title_elem and link_elem: href = a_tag.get("href")
url = link_elem.get("href") # Extract real URL from DDG redirection
if url.startswith("/url?q="): url = ""
url = url.split("/url?q=")[1].split("&")[0] if "uddg=" in href:
results.append({"title": title_elem.get_text(), "url": url}) url = unquote(href.split("uddg=")[1].split("&")[0])
else:
url = href
results.append({"title": a_tag.get_text(), "url": url})
return results return results
except Exception as e: except Exception as e:
logger.error(f"Search failed for {query}: {e}") logger.error(f"SEARCH ERROR: {query}: {e}")
return [] return []
class NetworkDiscoveryService: class NetworkDiscoveryService:
@ -96,8 +100,7 @@ class NetworkDiscoveryService:
class EntityResolutionService: class EntityResolutionService:
@staticmethod @staticmethod
def resolve(data): def resolve(data):
# Fallback to NetworkDiscoveryService for now
query = data.get('query') query = data.get('query')
if query: if query:
return NetworkDiscoveryService.perform_osint_search(query) return NetworkDiscoveryService.perform_osint_search(query)
return None return None

View File

@ -59,7 +59,7 @@ document.getElementById('searchForm').addEventListener('submit', function(e) {
loader.style.display = "inline-block"; loader.style.display = "inline-block";
graphContainer.html('<p class="p-3 text-muted">Discovering network, please wait...</p>'); graphContainer.html('<p class="p-3 text-muted">Discovering network, please wait...</p>');
fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`) fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`, { method: 'GET' })
.then(response => { .then(response => {
if (!response.ok) throw new Error("Search failed"); if (!response.ok) throw new Error("Search failed");
return response.json(); return response.json();