Initial import

This commit is contained in:
Flatlogic Bot 2026-03-01 01:53:03 +00:00
commit b54ba42545
15 changed files with 1446 additions and 0 deletions

Binary file not shown.

BIN
__MACOSX/hackathon/._go.mod Normal file

Binary file not shown.

BIN
__MACOSX/hackathon/._go.sum Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
hackathon/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

24
hackathon/go.mod Normal file
View File

@ -0,0 +1,24 @@
module finnhub_scraper
go 1.25.0
require (
github.com/vartanbeno/go-reddit/v2 v2.0.1
github.com/xitongsys/parquet-go v1.6.2
github.com/xitongsys/parquet-go-source v0.0.0-20241021075129-b732d2ac9c9b
)
require (
github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect
github.com/apache/thrift v0.14.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.3 // indirect
github.com/google/go-querystring v1.0.0 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/pierrec/lz4/v4 v4.1.8 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.0 // indirect
)

1050
hackathon/go.sum Normal file

File diff suppressed because it is too large Load Diff

BIN
hackathon/mentions.parquet Normal file

Binary file not shown.

92
hackathon/sentiment.py Normal file
View File

@ -0,0 +1,92 @@
import pyarrow.parquet as pq
from datetime import datetime, date, timedelta
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import pandas as pd
import numpy as np
import subprocess
def run_go_scraper(file_name: str, target_date: str, days: int, ticker: str, stock: str):
# This calls the Go file with arguments
cmd = ["go", "run", file_name, f"-start={target_date}", f"-days={days}", f"-ticker={ticker}", f"-stock={stock}"]
print(f"Executing: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print("Scraper Success!")
print(result.stdout)
else:
print("Scraper Error:", result.stderr)
# Load model and tokenizer once at the module level to save memory/time
MODEL_NAME = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
def calculate_weighted_sentiment(file_path, end_date_str, num_days):
"""
Calculates the weighted sentiment score from a parquet file of news mentions.
"""
label_sign = {"negative": -1, "neutral": 0, "positive": 1}
end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()
# 1. Load and Filter Data
table = pq.read_table(file_path, columns=["title", "date"])
rows = table.to_pylist()
# Create date range (Oldest to newest)
date_list = [(end_date - timedelta(days=x)).strftime("%Y-%m-%d") for x in range(num_days)]
date_list.reverse()
# Group headlines by date
daily_headlines = {d: [] for d in date_list}
for row in rows:
if row["date"] in daily_headlines:
if row["title"] and row["title"].strip():
daily_headlines[row["date"]].append(row["title"])
# 2. Calculate Sentiment per Day
daily_sentiment_list = []
for d in date_list:
current_headlines = daily_headlines[d]
if not current_headlines:
daily_sentiment_list.append(0.0)
continue
results = nlp(current_headlines)
day_scores = []
for r in results:
# Applying your specific negative weight multiplier
multiplier = 2 if r["label"] == "negative" else 1
score = multiplier * r["score"] * label_sign[r["label"]]
day_scores.append(score)
daily_sentiment_list.append(np.mean(day_scores))
# 3. Calculate Hype and Recency
df = pd.read_parquet(file_path)
daily_counts = df.groupby('date').size()
# Hype list
max_val = daily_counts.max()
hype_list = np.exp(daily_counts - max_val)
# Recency list
unique_dates = pd.to_datetime(daily_counts.index)
most_recent_event = unique_dates.max()
t_days = (most_recent_event - unique_dates).to_series().dt.days
recency_list = (1 - np.tanh(t_days / 3)).tolist()
# 4. Final Scoring
weighted_final_scores = np.array(hype_list) * np.array(recency_list) * np.array(daily_sentiment_list)
final_scores = np.array(hype_list) * np.array(recency_list)
# Handle division by zero if lists are empty
if np.sum(final_scores) == 0:
return 0.0
total_sum = np.sum(weighted_final_scores) / np.sum(final_scores)
return total_sum

169
hackathon/tsallis.py Normal file
View File

@ -0,0 +1,169 @@
import numpy as np
import pandas as pd
import yfinance as yf
from dataclasses import dataclass
from sentiment import calculate_weighted_sentiment, run_go_scraper
from datetime import datetime, timedelta
# ── Config ────────────────────────────────────────────────────────────────
Q = 0.5 # Tsallis index (q<1 amplifies fat tails)
LAMBDA_DECAY = 0.97 # Exponential decay for time weighting
WINDOW = 60 # Rolling window (bars)
N_BINS = 15 # Histogram bins
BIN_SCALE = 3.5 # Bins span ±3.5 × historical std
MULTI_WINDOWS = (30, 60, 120) # Multi-scale averaging
PARK_WEIGHT = 0.3 # 0.0 = pure Tsallis, 1.0 = pure Parkinson
MIN_CAL = 120 # Minimum bars before first prediction
# ── Parkinson Range Volatility ────────────────────────────────────────────
def _parkinson(highs: np.ndarray, lows: np.ndarray) -> np.ndarray:
n = len(highs)
out = np.full(n, np.nan)
safe_lows = np.where(lows > 0, lows, highs)
log_hl_sq = np.log(np.where(highs > safe_lows, highs / safe_lows, 1.0)) ** 2
for i in range(WINDOW, n):
out[i] = np.sqrt(np.mean(log_hl_sq[i - WINDOW:i]) / (4.0 * np.log(2.0)))
return out
def _norm(value: float, past: np.ndarray) -> float:
clean = past[~np.isnan(past)]
if len(clean) < 20 or np.std(clean) == 0:
return 0.5
z = (value - np.mean(clean)) / np.std(clean)
return float(np.clip(np.tanh(z / 2.0 + 0.5), 0.0, 1.0))
# ── Tsallis Entropy Core ─────────────────────────────────────────────────
def _weights(T: int) -> np.ndarray:
return LAMBDA_DECAY ** np.arange(T, 0, -1, dtype=np.float64)
def _weighted_probs(returns: np.ndarray, weights: np.ndarray, edges: np.ndarray) -> np.ndarray:
n_bins = len(edges) - 1
wp = np.zeros(n_bins, dtype=np.float64)
idx = np.clip(np.digitize(returns, edges) - 1, 0, n_bins - 1)
np.add.at(wp, idx, weights)
s = weights.sum()
if s > 0:
wp /= s
return wp
def _entropy(wp: np.ndarray) -> float:
nz = wp[wp > 0]
return (1.0 - np.sum(nz ** Q)) / (Q - 1.0) if len(nz) > 0 else 0.0
def _max_entropy() -> float:
return (N_BINS ** (1.0 - Q) - 1.0) / (1.0 - Q)
def _tsallis_score(returns: np.ndarray, edges: np.ndarray, window: int) -> float:
eff = returns[-window:] if len(returns) >= window else returns
wp = _weighted_probs(eff, _weights(len(eff)), edges)
mx = _max_entropy()
return float(np.clip(_entropy(wp) / mx, 0.0, 1.0)) if mx else 0.0
# ── Public API ────────────────────────────────────────────────────────────
@dataclass
class Result:
score: float # Volatility/Entropy [0, 1]
sent: float # News Sentiment [-1, 1]
trade_signal: float # sentiment * score
regime: str # LOW / MODERATE / HIGH
def get_score(ticker_symbol: str, stock_name: str, parquet_path: str, end_dt_str: str, days_lookback: int, volatility_time: int, interval: str = "1h") -> Result:
# FIX: Parse the input string correctly using the argument 'end_dt_str'
end_dt = datetime.strptime(end_dt_str, "%Y-%m-%d")
start_dt = end_dt - timedelta(days=volatility_time)
# 2. Fetch Price Data
df = yf.Ticker(ticker_symbol).history(
start=start_dt.strftime("%Y-%m-%d"),
end=end_dt.strftime("%Y-%m-%d"),
interval=interval,
auto_adjust=True
)
if df.empty:
raise ValueError(f"No data for '{ticker_symbol}'.")
df["log_return"] = np.log(df["Close"] / df["Close"].shift(1))
df = df.dropna(subset=["log_return"])
returns = df["log_return"].values
highs = df["High"].values
lows = df["Low"].values
if len(returns) < MIN_CAL:
raise ValueError(f"Need {MIN_CAL} bars, got {len(returns)}.")
# 3. Calculate Tsallis Volatility
r = returns[:-1].astype(np.float64)
half = BIN_SCALE * np.std(r)
edges = np.linspace(np.mean(r) - half, np.mean(r) + half, N_BINS + 1)
ts = [_tsallis_score(returns, edges, w) for w in MULTI_WINDOWS if len(returns) >= w]
tsallis = float(np.mean(ts)) if ts else 0.5
# 4. Calculate Parkinson Volatility
park = 0.5
if PARK_WEIGHT > 0:
pv = _parkinson(highs, lows)
if not np.isnan(pv[-1]):
park = _norm(pv[-1], pv[:-1])
# 5. Final Vol Score & Sentiment Integration
vol_score = float(np.clip((1.0 - PARK_WEIGHT) * tsallis + PARK_WEIGHT * park, 0.0, 1.0))
# Pass correct variables to sentiment function
sent = calculate_weighted_sentiment(parquet_path, end_date_str=end_dt_str, num_days=days_lookback)
trade_signal = sent * vol_score
regime = "LOW" if vol_score < 0.3 else "MODERATE" if vol_score < 0.6 else "HIGH"
return Result(
score=vol_score,
sent=sent,
trade_signal=trade_signal,
regime=regime
)
# ── Execution ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
# 1. Define targets dynamically
target_ticker = str(input("Stock ticker: ")) # Ticker Symbol
target_stock = str(input("Stock name: ")) # Search keyword for headlines
lookback = int(input("Sentiment lookback: "))
run_date = (datetime.now()).strftime("%Y-%m-%d")
volatility_lookback = int(input("Volatility lookback: "))
# 2. Run the updated Go Scraper (Now with 5 arguments)
run_go_scraper("yahooscrape.go", run_date, lookback, target_ticker, target_stock)
# 3. Run Analysis
try:
res = get_score(
ticker_symbol=target_ticker,
stock_name=target_stock,
parquet_path="mentions.parquet",
end_dt_str=run_date,
days_lookback=lookback,
volatility_time=volatility_lookback
)
print(f"\n--- {target_ticker} ({res.regime} VOLATILITY) ---")
print(f"Vol Score: {res.score:.4f}")
print(f"Sentiment: {res.sent:.4f}")
print(f"Trade Signal: {res.trade_signal:.4f}")
# Verdict logic
if res.score < 0.3:
print("Verdict: SIT OUT (Low Activity)")
elif res.score > 0.6:
action = "BUY" if res.trade_signal > 0 else "SELL"
print(f"Verdict: FULL CONVICTION {action}")
else:
print("Verdict: MODERATE (Monitor/Small Position)")
except Exception as e:
print(f"Pipeline Error: {e}")

111
hackathon/yahooscrape.go Normal file
View File

@ -0,0 +1,111 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"net/http"
"strings"
"time"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/writer"
)
type FinnhubNews struct {
Datetime int64 `json:"datetime"`
Headline string `json:"headline"`
Source string `json:"source"`
}
type NewsRecord struct {
Title string `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
Source string `parquet:"name=source, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
Date string `parquet:"name=date, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
}
func main() {
// 1. Define flags for input
startDatePtr := flag.String("start", "Na", "Start date in YYYY-MM-DD format")
daysPtr := flag.Int("days", 0, "Number of days to look back")
tickerPtr := flag.String("ticker", "", "Stock Ticker")
stockPtr := flag.String("stock", "", "Stock Name")
flag.Parse()
apiKey := "d6hkh6hr01qr5k4cb93gd6hkh6hr01qr5k4cb940"
// 2. Parse the input start date
baseDate, err := time.Parse("2006-01-02", *startDatePtr)
if err != nil {
log.Fatalf("Invalid date format. Use YYYY-MM-DD: %v", err)
}
fw, err := local.NewLocalFileWriter("mentions.parquet")
if err != nil {
log.Fatal(err)
}
pw, err := writer.NewParquetWriter(fw, new(NewsRecord), 4)
if err != nil {
log.Fatal(err)
}
for i := 0; i < *daysPtr; i++ {
// 3. Subtract days from the PROVIDED start date instead of 'Now'
currentDate := baseDate.AddDate(0, 0, -i)
dateStr := currentDate.Format("2006-01-02")
fmt.Printf("Checking %s... ", dateStr)
// Updated symbol parameter to 'ticker'
url := fmt.Sprintf(
"https://finnhub.io/api/v1/company-news?symbol=%s&from=%s&to=%s&token=%s",
*tickerPtr, dateStr, dateStr, apiKey,
)
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Network Error: %v\n", err)
continue
}
var news []FinnhubNews
if err := json.NewDecoder(resp.Body).Decode(&news); err != nil {
resp.Body.Close()
fmt.Printf("Decode Error\n")
continue
}
resp.Body.Close()
matchCount := 0
for _, article := range news {
h := strings.ToLower(article.Headline)
// Dynamically check for the stock name or ticker passed in via flags
if strings.Contains(h, strings.ToLower(*stockPtr)) || strings.Contains(h, strings.ToLower(*tickerPtr)) {
record := NewsRecord{
Title: article.Headline,
Source: article.Source,
Date: dateStr,
}
pw.Write(record)
matchCount++
}
}
if matchCount == 0 {
fmt.Print("No matches. Writing placeholder.")
pw.Write(NewsRecord{Title: "", Source: "NONE", Date: dateStr})
} else {
fmt.Printf("Found %d matches.", matchCount)
}
fmt.Println()
time.Sleep(1 * time.Second)
}
pw.WriteStop()
fw.Close()
fmt.Println("\nFinished! Check mentions.parquet")
}