RIPLEY
This commit is contained in:
parent
4fda4c24bb
commit
5fa2cf7ba7
Binary file not shown.
@ -2,7 +2,7 @@ import requests
|
|||||||
import logging
|
import logging
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from core.models import Entity, Relationship, Source
|
from core.models import Entity, Relationship, Source
|
||||||
from urllib.parse import urljoin, quote
|
from urllib.parse import urljoin, quote, unquote
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ class WebCrawler:
|
|||||||
def fetch_url(self, url):
|
def fetch_url(self, url):
|
||||||
"""Fetch URL, extract title, meta description, and top images."""
|
"""Fetch URL, extract title, meta description, and top images."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Crawling page: {url}")
|
logger.info(f"CRAWLER: Fetching {url}")
|
||||||
response = self.session.get(url, timeout=10)
|
response = self.session.get(url, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
@ -34,29 +34,33 @@ class WebCrawler:
|
|||||||
|
|
||||||
return {"title": title, "description": description}, images
|
return {"title": title, "description": description}, images
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Crawling failed for {url}: {e}")
|
logger.error(f"CRAWLER ERROR: {url}: {e}")
|
||||||
return None, []
|
return None, []
|
||||||
|
|
||||||
def search(self, query):
|
def search(self, query):
|
||||||
"""Perform a Google search."""
|
"""Perform a DuckDuckGo search."""
|
||||||
search_url = f"https://www.google.com/search?q={quote(query)}"
|
search_url = f"https://duckduckgo.com/html/?q={quote(query)}"
|
||||||
try:
|
try:
|
||||||
response = self.session.get(search_url, timeout=10)
|
response = self.session.get(search_url, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for g in soup.select("div.g"):
|
for res in soup.find_all("div", class_="result"):
|
||||||
title_elem = g.select_one("h3")
|
a_tag = res.find("a", class_="result__a")
|
||||||
link_elem = g.select_one("a")
|
if a_tag and a_tag.get("href"):
|
||||||
if title_elem and link_elem:
|
href = a_tag.get("href")
|
||||||
url = link_elem.get("href")
|
# Extract real URL from DDG redirection
|
||||||
if url.startswith("/url?q="):
|
url = ""
|
||||||
url = url.split("/url?q=")[1].split("&")[0]
|
if "uddg=" in href:
|
||||||
results.append({"title": title_elem.get_text(), "url": url})
|
url = unquote(href.split("uddg=")[1].split("&")[0])
|
||||||
|
else:
|
||||||
|
url = href
|
||||||
|
|
||||||
|
results.append({"title": a_tag.get_text(), "url": url})
|
||||||
return results
|
return results
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Search failed for {query}: {e}")
|
logger.error(f"SEARCH ERROR: {query}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
class NetworkDiscoveryService:
|
class NetworkDiscoveryService:
|
||||||
@ -96,7 +100,6 @@ class NetworkDiscoveryService:
|
|||||||
class EntityResolutionService:
|
class EntityResolutionService:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve(data):
|
def resolve(data):
|
||||||
# Fallback to NetworkDiscoveryService for now
|
|
||||||
query = data.get('query')
|
query = data.get('query')
|
||||||
if query:
|
if query:
|
||||||
return NetworkDiscoveryService.perform_osint_search(query)
|
return NetworkDiscoveryService.perform_osint_search(query)
|
||||||
|
|||||||
@ -59,7 +59,7 @@ document.getElementById('searchForm').addEventListener('submit', function(e) {
|
|||||||
loader.style.display = "inline-block";
|
loader.style.display = "inline-block";
|
||||||
graphContainer.html('<p class="p-3 text-muted">Discovering network, please wait...</p>');
|
graphContainer.html('<p class="p-3 text-muted">Discovering network, please wait...</p>');
|
||||||
|
|
||||||
fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`)
|
fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`, { method: 'GET' })
|
||||||
.then(response => {
|
.then(response => {
|
||||||
if (!response.ok) throw new Error("Search failed");
|
if (!response.ok) throw new Error("Search failed");
|
||||||
return response.json();
|
return response.json();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user