import streamlit as st
from pathlib import Path
import tempfile
import zipfile
import shutil
import ftplib
import os
import re
import time
import unicodedata
from collections import defaultdict

# ============================================================
# PAGE CONFIG
# ============================================================

st.set_page_config(
    page_title="DJ AI Downloader (Ludo FINAL)",
    layout="centered"
)

st.title("⬇️ DJ AI Downloader (Ludo FINAL)")

# ============================================================
# FTP CONFIG
# ============================================================

FTP_HOST = "ftp.djdownload.me"
FTP_USER = "ludovic.m.santos"
FTP_PASS = "3af32ssH"
FTP_PORT = 21
FTP_BASE = "/home/ftp.djdownload.me"

AUDIO_EXT = (".mp3", ".wav")

MATCH_THRESHOLD = 0.92
DOWNLOAD_DELAY = 3
FTP_TIMEOUT = 30

IGNORE_WORDS = {
    "mix","original","extended","edit","club",
    "radio","dub","instrumental","version","remix"
}

# ============================================================
# SESSION SAFE FOLDERS
# ============================================================

if "shortlist_dir" not in st.session_state:
    st.session_state.shortlist_dir = Path(tempfile.mkdtemp(prefix="shortlist_"))

if "download_dir" not in st.session_state:
    st.session_state.download_dir = Path(tempfile.mkdtemp(prefix="downloads_"))

if "zip_path" not in st.session_state:
    st.session_state.zip_path = None

if "stop" not in st.session_state:
    st.session_state.stop = False

SHORTLIST_DIR = st.session_state.shortlist_dir
DOWNLOAD_ROOT = st.session_state.download_dir

# ============================================================
# HELPERS
# ============================================================

def normalize(text):
    text = unicodedata.normalize("NFKD", text)
    text = text.encode("ascii","ignore").decode("ascii")
    text = text.lower()
    text = re.sub(r"\(.*?\)","",text)
    text = re.sub(r"\[.*?\]","",text)
    text = re.sub(r"[^a-z0-9]+"," ",text)

    tokens = [t for t in text.split() if t not in IGNORE_WORDS]
    return " ".join(tokens)

def token_similarity(a,b):
    sa,sb = set(a.split()), set(b.split())
    if not sa or not sb:
        return 0.0
    return len(sa & sb) / len(sa)

# ============================================================
# AUTO DETECT YEARS
# ============================================================

def get_years(ftp):
    years = []
    for name in ftp.nlst():
        if name.endswith("-DJdownload"):
            years.append(name)

    years.sort()
    return years

# ============================================================
# LOAD SHORTLIST
# ============================================================

def load_shortlist():
    by_genre = defaultdict(list)

    for genre_dir in SHORTLIST_DIR.iterdir():
        if not genre_dir.is_dir():
            continue

        genre = genre_dir.name

        for f in genre_dir.iterdir():
            if f.suffix.lower() not in AUDIO_EXT:
                continue

            by_genre[genre].append({
                "file": f.name,
                "norm": normalize(f.stem)
            })

    return by_genre

# ============================================================
# UI — UPLOAD ZIP
# ============================================================

st.subheader("📦 Upload shortlist ZIP")

uploaded_zip = st.file_uploader(
    "Upload shortlist.zip (from discovery app)",
    type=["zip"]
)

if uploaded_zip:

    shutil.rmtree(SHORTLIST_DIR, ignore_errors=True)
    SHORTLIST_DIR.mkdir(parents=True, exist_ok=True)

    with zipfile.ZipFile(uploaded_zip) as z:
        z.extractall(SHORTLIST_DIR)

    st.session_state.zip_path = None

    st.success("Shortlist loaded!")

    for g in SHORTLIST_DIR.iterdir():
        if g.is_dir():
            count = len([f for f in g.iterdir() if f.is_file()])
            st.write(f"🎵 {g.name} → {count} tracks")

st.divider()

# ============================================================
# STOP BUTTON
# ============================================================

if st.button("🛑 Stop download"):
    st.session_state.stop = True

# ============================================================
# PROGRESS + LOGS
# ============================================================

progress = st.progress(0)
log_box = st.empty()

logs = []

def log(msg):
    logs.append(msg)
    log_box.text("\n".join(logs[-25:]))

# ============================================================
# MAIN DOWNLOAD
# ============================================================

def run_download():

    shortlist = load_shortlist()

    if not shortlist:
        st.error("Shortlist is empty")
        return

    ftp = ftplib.FTP(timeout=FTP_TIMEOUT)
    ftp.connect(FTP_HOST, FTP_PORT)
    ftp.login(FTP_USER, FTP_PASS)
    ftp.set_pasv(True)
    ftp.cwd(FTP_BASE)

    YEARS = get_years(ftp)

    log(f"📂 Found years: {', '.join(YEARS)}")

    downloaded = 0
    downloaded_norms = set()
    missing = []

    total_years = len(YEARS)
    year_index = 0

    for year in YEARS:

        if st.session_state.stop:
            break

        year_index += 1
        progress.progress(year_index / total_years)

        try:
            ftp.cwd(year)
            log(f"📅 Checking {year}")
        except:
            continue

        for month in ftp.nlst():
            try:
                ftp.cwd(month)
            except:
                continue

            for day in ftp.nlst():
                try:
                    ftp.cwd(day)
                except:
                    continue

                for local_genre, shortlist_tracks in shortlist.items():

                    ftp_genre = local_genre.replace("_"," ")

                    try:
                        ftp.cwd(ftp_genre)
                    except:
                        continue

                    target_dir = DOWNLOAD_ROOT / year.replace("-DJdownload","") / ftp_genre
                    target_dir.mkdir(parents=True, exist_ok=True)

                    for fname in ftp.nlst():

                        if st.session_state.stop:
                            break

                        if not fname.lower().endswith(AUDIO_EXT):
                            continue

                        ftp_norm = normalize(os.path.splitext(fname)[0])

                        if ftp_norm in downloaded_norms:
                            continue

                        for s in shortlist_tracks:

                            if token_similarity(s["norm"], ftp_norm) >= MATCH_THRESHOLD:

                                out = target_dir / fname

                                if out.exists():
                                    downloaded_norms.add(ftp_norm)
                                    break

                                log(f"⬇️ {fname}")

                                try:
                                    with open(out,"wb") as f:
                                        ftp.retrbinary(f"RETR {fname}", f.write)

                                    downloaded += 1
                                    downloaded_norms.add(ftp_norm)
                                    time.sleep(DOWNLOAD_DELAY)

                                except Exception as e:
                                    log(f"⚠️ Failed: {fname}")
                                    missing.append(f"{local_genre} / {fname}")

                                break

                    ftp.cwd("..")

                ftp.cwd("..")
            ftp.cwd("..")
        ftp.cwd("..")

    ftp.quit()

    # detect missing
    for genre, tracks in shortlist.items():
        for t in tracks:
            if t["norm"] not in downloaded_norms:
                missing.append(f"{genre} / {t['file']}")

    log(f"✅ Downloaded: {downloaded}")
    log(f"❌ Missing: {len(missing)}")

    if missing:
        log("----- NOT FOUND -----")
        for m in missing:
            log(m)

# ============================================================
# RUN BUTTON
# ============================================================

if st.button("🚀 Start download", use_container_width=True):

    st.session_state.stop = False
    logs.clear()
    progress.progress(0)

    with st.spinner("Downloading from DJDownload FTP..."):
        run_download()

# ============================================================
# ZIP BUILD (SAFE)
# ============================================================

def build_zip():

    zip_path = DOWNLOAD_ROOT / "downloads.zip"

    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_STORED) as z:
        for f in DOWNLOAD_ROOT.rglob("*"):
            if f.is_file():
                z.write(f, arcname=f.relative_to(DOWNLOAD_ROOT))

    return zip_path

# ============================================================
# ZIP DOWNLOAD
# ============================================================

st.divider()
st.subheader("📥 Download results")

if DOWNLOAD_ROOT.exists() and any(DOWNLOAD_ROOT.rglob("*")):

    if st.session_state.zip_path is None:
        with st.spinner("📦 Creating ZIP..."):
            st.session_state.zip_path = build_zip()

    zip_file = st.session_state.zip_path

    if zip_file and zip_file.exists():
        with open(zip_file,"rb") as f:
            st.download_button(
                "⬇️ Download ZIP (year/genre organized)",
                f,
                "downloads.zip",
                mime="application/zip",
                use_container_width=True
            )
else:
    st.info("No downloads yet")

# ============================================================
# CLEAR SESSION
# ============================================================

st.divider()

if st.button("🧹 Clear session"):

    shutil.rmtree(SHORTLIST_DIR, ignore_errors=True)
    shutil.rmtree(DOWNLOAD_ROOT, ignore_errors=True)

    st.session_state.shortlist_dir = Path(tempfile.mkdtemp(prefix="shortlist_"))
    st.session_state.download_dir = Path(tempfile.mkdtemp(prefix="downloads_"))
    st.session_state.zip_path = None

    st.success("Session cleaned")
