#!/usr/bin/env python3

import ftplib
import os
import re
from pathlib import Path
from difflib import SequenceMatcher

# ============================================================
# CONFIG
# ============================================================

FTP_HOST = "ftp.djdownload.me"
FTP_USER = "ludovic.m.santos"
FTP_PASS = "3af32ssH"
FTP_PORT = 21

# ⚠️ IMPORTANT: real FTP jail root
FTP_BASE = "/home/ftp.djdownload.me"

YEARS = [
    "2024-DJdownload",
    "2025-DJdownload",
    "2026-DJdownload",
]

SHORTLIST_DIR = Path("/volume1/dj-ai/shortlist")

AUDIO_EXT = (".mp3", ".wav")
MATCH_THRESHOLD = 0.80   # title-only match

# ============================================================
# HELPERS
# ============================================================

def normalize(name: str) -> str:
    """
    Normalize to TITLE ONLY
    """
    name = name.lower()

    # drop artist if present
    if " - " in name:
        name = name.split(" - ", 1)[1]

    # remove brackets
    name = re.sub(r"\(.*?\)", "", name)
    name = re.sub(r"\[.*?\]", "", name)

    # remove junk tokens
    name = re.sub(
        r"\b(djd|exclusive|promo|extended|original|mix|edit|version|wav|mp3)\b",
        "",
        name
    )

    name = re.sub(r"[^a-z0-9]+", " ", name)
    return name.strip()


def similarity(a: str, b: str) -> float:
    return SequenceMatcher(None, a, b).ratio()

# ============================================================
# LOAD SHORTLIST
# ============================================================

def load_shortlist():
    tracks = []

    for genre_dir in SHORTLIST_DIR.iterdir():
        if not genre_dir.is_dir():
            continue

        for f in genre_dir.iterdir():
            if f.suffix.lower() in AUDIO_EXT:
                tracks.append({
                    "file": f.name,
                    "norm": normalize(f.stem),
                })

    return tracks

# ============================================================
# DRY RUN SCAN
# ============================================================

def dry_run_scan():
    print("🧪 DRY RUN — NO DOWNLOADS WILL OCCUR")

    shortlist = load_shortlist()
    print(f"🔍 Shortlist tracks loaded: {len(shortlist)}")

    ftp = ftplib.FTP()
    ftp.connect(FTP_HOST, FTP_PORT, timeout=30)
    ftp.login(FTP_USER, FTP_PASS)
    ftp.set_pasv(True)

    # enter jailed root
    ftp.cwd(FTP_BASE)

    matches = []

    for year in YEARS:
        try:
            ftp.cwd(year)
        except Exception:
            continue

        for month in ftp.nlst():
            try:
                ftp.cwd(month)
            except Exception:
                continue

            for day in ftp.nlst():
                try:
                    ftp.cwd(day)
                except Exception:
                    continue

                for genre in ftp.nlst():
                    try:
                        ftp.cwd(genre)
                    except Exception:
                        continue

                    for f in ftp.nlst():
                        if not f.lower().endswith(AUDIO_EXT):
                            continue

                        ftp_norm = normalize(os.path.splitext(f)[0])

                        for s in shortlist:
                            score = similarity(s["norm"], ftp_norm)

                            if score >= MATCH_THRESHOLD:
                                matches.append({
                                    "shortlist": s["file"],
                                    "ftp_path": f"{year}/{month}/{day}/{genre}/{f}",
                                    "score": round(score, 3),
                                })

                                print("✅ MATCH FOUND")
                                print(f"   Shortlist : {s['file']}")
                                print(f"   FTP       : {year}/{month}/{day}/{genre}/{f}")
                                print(f"   Score     : {round(score,3)}\n")

                    ftp.cwd("..")
                ftp.cwd("..")
            ftp.cwd("..")

        ftp.cwd("..")

    ftp.quit()

    print("=" * 40)
    print(f"🎯 TOTAL MATCHES FOUND: {len(matches)}")
    print("=" * 40)

# ============================================================
# MAIN
# ============================================================

if __name__ == "__main__":
    dry_run_scan()
