Initial import
This commit is contained in:
commit
cb0542cc7c
BIN
__MACOSX/hackathon/._.DS_Store
Normal file
BIN
__MACOSX/hackathon/._.DS_Store
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._go.mod
Normal file
BIN
__MACOSX/hackathon/._go.mod
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._go.sum
Normal file
BIN
__MACOSX/hackathon/._go.sum
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._mentions.parquet
Normal file
BIN
__MACOSX/hackathon/._mentions.parquet
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._sentiment.py
Normal file
BIN
__MACOSX/hackathon/._sentiment.py
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._tsallis.py
Normal file
BIN
__MACOSX/hackathon/._tsallis.py
Normal file
Binary file not shown.
BIN
__MACOSX/hackathon/._yahooscrape.go
Normal file
BIN
__MACOSX/hackathon/._yahooscrape.go
Normal file
Binary file not shown.
BIN
hackathon/.DS_Store
vendored
Normal file
BIN
hackathon/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
hackathon/__pycache__/sentiment.cpython-310.pyc
Normal file
BIN
hackathon/__pycache__/sentiment.cpython-310.pyc
Normal file
Binary file not shown.
24
hackathon/go.mod
Normal file
24
hackathon/go.mod
Normal file
@ -0,0 +1,24 @@
|
||||
module finnhub_scraper
|
||||
|
||||
go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/vartanbeno/go-reddit/v2 v2.0.1
|
||||
github.com/xitongsys/parquet-go v1.6.2
|
||||
github.com/xitongsys/parquet-go-source v0.0.0-20241021075129-b732d2ac9c9b
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect
|
||||
github.com/apache/thrift v0.14.2 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/golang/snappy v0.0.3 // indirect
|
||||
github.com/google/go-querystring v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.15.9 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.8 // indirect
|
||||
golang.org/x/net v0.10.0 // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.28.0 // indirect
|
||||
)
|
||||
1050
hackathon/go.sum
Normal file
1050
hackathon/go.sum
Normal file
File diff suppressed because it is too large
Load Diff
BIN
hackathon/mentions.parquet
Normal file
BIN
hackathon/mentions.parquet
Normal file
Binary file not shown.
92
hackathon/sentiment.py
Normal file
92
hackathon/sentiment.py
Normal file
@ -0,0 +1,92 @@
|
||||
import pyarrow.parquet as pq
|
||||
from datetime import datetime, date, timedelta
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import subprocess
|
||||
|
||||
def run_go_scraper(file_name: str, target_date: str, days: int, ticker: str, stock: str):
|
||||
# This calls the Go file with arguments
|
||||
cmd = ["go", "run", file_name, f"-start={target_date}", f"-days={days}", f"-ticker={ticker}", f"-stock={stock}"]
|
||||
|
||||
print(f"Executing: {' '.join(cmd)}")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Scraper Success!")
|
||||
print(result.stdout)
|
||||
else:
|
||||
print("Scraper Error:", result.stderr)
|
||||
|
||||
# Load model and tokenizer once at the module level to save memory/time
|
||||
MODEL_NAME = "ProsusAI/finbert"
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
||||
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
||||
|
||||
def calculate_weighted_sentiment(file_path, end_date_str, num_days):
|
||||
"""
|
||||
Calculates the weighted sentiment score from a parquet file of news mentions.
|
||||
"""
|
||||
label_sign = {"negative": -1, "neutral": 0, "positive": 1}
|
||||
end_date = datetime.strptime(end_date_str, "%Y-%m-%d").date()
|
||||
|
||||
# 1. Load and Filter Data
|
||||
table = pq.read_table(file_path, columns=["title", "date"])
|
||||
rows = table.to_pylist()
|
||||
|
||||
# Create date range (Oldest to newest)
|
||||
date_list = [(end_date - timedelta(days=x)).strftime("%Y-%m-%d") for x in range(num_days)]
|
||||
date_list.reverse()
|
||||
|
||||
# Group headlines by date
|
||||
daily_headlines = {d: [] for d in date_list}
|
||||
for row in rows:
|
||||
if row["date"] in daily_headlines:
|
||||
if row["title"] and row["title"].strip():
|
||||
daily_headlines[row["date"]].append(row["title"])
|
||||
|
||||
# 2. Calculate Sentiment per Day
|
||||
daily_sentiment_list = []
|
||||
for d in date_list:
|
||||
current_headlines = daily_headlines[d]
|
||||
|
||||
if not current_headlines:
|
||||
daily_sentiment_list.append(0.0)
|
||||
continue
|
||||
|
||||
results = nlp(current_headlines)
|
||||
|
||||
day_scores = []
|
||||
for r in results:
|
||||
# Applying your specific negative weight multiplier
|
||||
multiplier = 2 if r["label"] == "negative" else 1
|
||||
score = multiplier * r["score"] * label_sign[r["label"]]
|
||||
day_scores.append(score)
|
||||
|
||||
daily_sentiment_list.append(np.mean(day_scores))
|
||||
|
||||
# 3. Calculate Hype and Recency
|
||||
df = pd.read_parquet(file_path)
|
||||
daily_counts = df.groupby('date').size()
|
||||
|
||||
# Hype list
|
||||
max_val = daily_counts.max()
|
||||
hype_list = np.exp(daily_counts - max_val)
|
||||
|
||||
# Recency list
|
||||
unique_dates = pd.to_datetime(daily_counts.index)
|
||||
most_recent_event = unique_dates.max()
|
||||
t_days = (most_recent_event - unique_dates).to_series().dt.days
|
||||
recency_list = (1 - np.tanh(t_days / 3)).tolist()
|
||||
|
||||
# 4. Final Scoring
|
||||
weighted_final_scores = np.array(hype_list) * np.array(recency_list) * np.array(daily_sentiment_list)
|
||||
final_scores = np.array(hype_list) * np.array(recency_list)
|
||||
|
||||
# Handle division by zero if lists are empty
|
||||
if np.sum(final_scores) == 0:
|
||||
return 0.0
|
||||
|
||||
total_sum = np.sum(weighted_final_scores) / np.sum(final_scores)
|
||||
return total_sum
|
||||
169
hackathon/tsallis.py
Normal file
169
hackathon/tsallis.py
Normal file
@ -0,0 +1,169 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
from dataclasses import dataclass
|
||||
from sentiment import calculate_weighted_sentiment, run_go_scraper
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# ── Config ────────────────────────────────────────────────────────────────
|
||||
|
||||
Q = 0.5 # Tsallis index (q<1 amplifies fat tails)
|
||||
LAMBDA_DECAY = 0.97 # Exponential decay for time weighting
|
||||
WINDOW = 60 # Rolling window (bars)
|
||||
N_BINS = 15 # Histogram bins
|
||||
BIN_SCALE = 3.5 # Bins span ±3.5 × historical std
|
||||
MULTI_WINDOWS = (30, 60, 120) # Multi-scale averaging
|
||||
PARK_WEIGHT = 0.3 # 0.0 = pure Tsallis, 1.0 = pure Parkinson
|
||||
MIN_CAL = 120 # Minimum bars before first prediction
|
||||
|
||||
# ── Parkinson Range Volatility ────────────────────────────────────────────
|
||||
|
||||
def _parkinson(highs: np.ndarray, lows: np.ndarray) -> np.ndarray:
|
||||
n = len(highs)
|
||||
out = np.full(n, np.nan)
|
||||
safe_lows = np.where(lows > 0, lows, highs)
|
||||
log_hl_sq = np.log(np.where(highs > safe_lows, highs / safe_lows, 1.0)) ** 2
|
||||
for i in range(WINDOW, n):
|
||||
out[i] = np.sqrt(np.mean(log_hl_sq[i - WINDOW:i]) / (4.0 * np.log(2.0)))
|
||||
return out
|
||||
|
||||
def _norm(value: float, past: np.ndarray) -> float:
|
||||
clean = past[~np.isnan(past)]
|
||||
if len(clean) < 20 or np.std(clean) == 0:
|
||||
return 0.5
|
||||
z = (value - np.mean(clean)) / np.std(clean)
|
||||
return float(np.clip(np.tanh(z / 2.0 + 0.5), 0.0, 1.0))
|
||||
|
||||
# ── Tsallis Entropy Core ─────────────────────────────────────────────────
|
||||
|
||||
def _weights(T: int) -> np.ndarray:
|
||||
return LAMBDA_DECAY ** np.arange(T, 0, -1, dtype=np.float64)
|
||||
|
||||
def _weighted_probs(returns: np.ndarray, weights: np.ndarray, edges: np.ndarray) -> np.ndarray:
|
||||
n_bins = len(edges) - 1
|
||||
wp = np.zeros(n_bins, dtype=np.float64)
|
||||
idx = np.clip(np.digitize(returns, edges) - 1, 0, n_bins - 1)
|
||||
np.add.at(wp, idx, weights)
|
||||
s = weights.sum()
|
||||
if s > 0:
|
||||
wp /= s
|
||||
return wp
|
||||
|
||||
def _entropy(wp: np.ndarray) -> float:
|
||||
nz = wp[wp > 0]
|
||||
return (1.0 - np.sum(nz ** Q)) / (Q - 1.0) if len(nz) > 0 else 0.0
|
||||
|
||||
def _max_entropy() -> float:
|
||||
return (N_BINS ** (1.0 - Q) - 1.0) / (1.0 - Q)
|
||||
|
||||
def _tsallis_score(returns: np.ndarray, edges: np.ndarray, window: int) -> float:
|
||||
eff = returns[-window:] if len(returns) >= window else returns
|
||||
wp = _weighted_probs(eff, _weights(len(eff)), edges)
|
||||
mx = _max_entropy()
|
||||
return float(np.clip(_entropy(wp) / mx, 0.0, 1.0)) if mx else 0.0
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class Result:
|
||||
score: float # Volatility/Entropy [0, 1]
|
||||
sent: float # News Sentiment [-1, 1]
|
||||
trade_signal: float # sentiment * score
|
||||
regime: str # LOW / MODERATE / HIGH
|
||||
|
||||
def get_score(ticker_symbol: str, stock_name: str, parquet_path: str, end_dt_str: str, days_lookback: int, volatility_time: int, interval: str = "1h") -> Result:
|
||||
# FIX: Parse the input string correctly using the argument 'end_dt_str'
|
||||
end_dt = datetime.strptime(end_dt_str, "%Y-%m-%d")
|
||||
start_dt = end_dt - timedelta(days=volatility_time)
|
||||
|
||||
# 2. Fetch Price Data
|
||||
df = yf.Ticker(ticker_symbol).history(
|
||||
start=start_dt.strftime("%Y-%m-%d"),
|
||||
end=end_dt.strftime("%Y-%m-%d"),
|
||||
interval=interval,
|
||||
auto_adjust=True
|
||||
)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No data for '{ticker_symbol}'.")
|
||||
|
||||
df["log_return"] = np.log(df["Close"] / df["Close"].shift(1))
|
||||
df = df.dropna(subset=["log_return"])
|
||||
|
||||
returns = df["log_return"].values
|
||||
highs = df["High"].values
|
||||
lows = df["Low"].values
|
||||
|
||||
if len(returns) < MIN_CAL:
|
||||
raise ValueError(f"Need {MIN_CAL} bars, got {len(returns)}.")
|
||||
|
||||
# 3. Calculate Tsallis Volatility
|
||||
r = returns[:-1].astype(np.float64)
|
||||
half = BIN_SCALE * np.std(r)
|
||||
edges = np.linspace(np.mean(r) - half, np.mean(r) + half, N_BINS + 1)
|
||||
ts = [_tsallis_score(returns, edges, w) for w in MULTI_WINDOWS if len(returns) >= w]
|
||||
tsallis = float(np.mean(ts)) if ts else 0.5
|
||||
|
||||
# 4. Calculate Parkinson Volatility
|
||||
park = 0.5
|
||||
if PARK_WEIGHT > 0:
|
||||
pv = _parkinson(highs, lows)
|
||||
if not np.isnan(pv[-1]):
|
||||
park = _norm(pv[-1], pv[:-1])
|
||||
|
||||
# 5. Final Vol Score & Sentiment Integration
|
||||
vol_score = float(np.clip((1.0 - PARK_WEIGHT) * tsallis + PARK_WEIGHT * park, 0.0, 1.0))
|
||||
|
||||
# Pass correct variables to sentiment function
|
||||
sent = calculate_weighted_sentiment(parquet_path, end_date_str=end_dt_str, num_days=days_lookback)
|
||||
|
||||
trade_signal = sent * vol_score
|
||||
regime = "LOW" if vol_score < 0.3 else "MODERATE" if vol_score < 0.6 else "HIGH"
|
||||
|
||||
return Result(
|
||||
score=vol_score,
|
||||
sent=sent,
|
||||
trade_signal=trade_signal,
|
||||
regime=regime
|
||||
)
|
||||
|
||||
# ── Execution ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 1. Define targets dynamically
|
||||
target_ticker = str(input("Stock ticker: ")) # Ticker Symbol
|
||||
target_stock = str(input("Stock name: ")) # Search keyword for headlines
|
||||
lookback = int(input("Sentiment lookback: "))
|
||||
run_date = (datetime.now()).strftime("%Y-%m-%d")
|
||||
volatility_lookback = int(input("Volatility lookback: "))
|
||||
|
||||
# 2. Run the updated Go Scraper (Now with 5 arguments)
|
||||
run_go_scraper("yahooscrape.go", run_date, lookback, target_ticker, target_stock)
|
||||
|
||||
# 3. Run Analysis
|
||||
try:
|
||||
res = get_score(
|
||||
ticker_symbol=target_ticker,
|
||||
stock_name=target_stock,
|
||||
parquet_path="mentions.parquet",
|
||||
end_dt_str=run_date,
|
||||
days_lookback=lookback,
|
||||
volatility_time=volatility_lookback
|
||||
)
|
||||
|
||||
print(f"\n--- {target_ticker} ({res.regime} VOLATILITY) ---")
|
||||
print(f"Vol Score: {res.score:.4f}")
|
||||
print(f"Sentiment: {res.sent:.4f}")
|
||||
print(f"Trade Signal: {res.trade_signal:.4f}")
|
||||
|
||||
# Verdict logic
|
||||
if res.score < 0.3:
|
||||
print("Verdict: SIT OUT (Low Activity)")
|
||||
elif res.score > 0.6:
|
||||
action = "BUY" if res.trade_signal > 0 else "SELL"
|
||||
print(f"Verdict: FULL CONVICTION {action}")
|
||||
else:
|
||||
print("Verdict: MODERATE (Monitor/Small Position)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Pipeline Error: {e}")
|
||||
111
hackathon/yahooscrape.go
Normal file
111
hackathon/yahooscrape.go
Normal file
@ -0,0 +1,111 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/xitongsys/parquet-go-source/local"
|
||||
"github.com/xitongsys/parquet-go/writer"
|
||||
)
|
||||
|
||||
type FinnhubNews struct {
|
||||
Datetime int64 `json:"datetime"`
|
||||
Headline string `json:"headline"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
type NewsRecord struct {
|
||||
Title string `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
|
||||
Source string `parquet:"name=source, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
|
||||
Date string `parquet:"name=date, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
// 1. Define flags for input
|
||||
startDatePtr := flag.String("start", "Na", "Start date in YYYY-MM-DD format")
|
||||
daysPtr := flag.Int("days", 0, "Number of days to look back")
|
||||
|
||||
tickerPtr := flag.String("ticker", "", "Stock Ticker")
|
||||
stockPtr := flag.String("stock", "", "Stock Name")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
apiKey := "d6hkh6hr01qr5k4cb93gd6hkh6hr01qr5k4cb940"
|
||||
|
||||
// 2. Parse the input start date
|
||||
baseDate, err := time.Parse("2006-01-02", *startDatePtr)
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid date format. Use YYYY-MM-DD: %v", err)
|
||||
}
|
||||
|
||||
fw, err := local.NewLocalFileWriter("mentions.parquet")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
pw, err := writer.NewParquetWriter(fw, new(NewsRecord), 4)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < *daysPtr; i++ {
|
||||
// 3. Subtract days from the PROVIDED start date instead of 'Now'
|
||||
currentDate := baseDate.AddDate(0, 0, -i)
|
||||
dateStr := currentDate.Format("2006-01-02")
|
||||
|
||||
fmt.Printf("Checking %s... ", dateStr)
|
||||
|
||||
// Updated symbol parameter to 'ticker'
|
||||
url := fmt.Sprintf(
|
||||
"https://finnhub.io/api/v1/company-news?symbol=%s&from=%s&to=%s&token=%s",
|
||||
*tickerPtr, dateStr, dateStr, apiKey,
|
||||
)
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Printf("Network Error: %v\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var news []FinnhubNews
|
||||
if err := json.NewDecoder(resp.Body).Decode(&news); err != nil {
|
||||
resp.Body.Close()
|
||||
fmt.Printf("Decode Error\n")
|
||||
continue
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
matchCount := 0
|
||||
for _, article := range news {
|
||||
h := strings.ToLower(article.Headline)
|
||||
|
||||
// Dynamically check for the stock name or ticker passed in via flags
|
||||
if strings.Contains(h, strings.ToLower(*stockPtr)) || strings.Contains(h, strings.ToLower(*tickerPtr)) {
|
||||
record := NewsRecord{
|
||||
Title: article.Headline,
|
||||
Source: article.Source,
|
||||
Date: dateStr,
|
||||
}
|
||||
pw.Write(record)
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
|
||||
if matchCount == 0 {
|
||||
fmt.Print("No matches. Writing placeholder.")
|
||||
pw.Write(NewsRecord{Title: "", Source: "NONE", Date: dateStr})
|
||||
} else {
|
||||
fmt.Printf("Found %d matches.", matchCount)
|
||||
}
|
||||
fmt.Println()
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
pw.WriteStop()
|
||||
fw.Close()
|
||||
fmt.Println("\nFinished! Check mentions.parquet")
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user