#!/usr/bin/env python3
"""
validate.py — Validate LinkinPark.xml before building .als

Checks all known failure modes based on hard-won experience.
Run automatically by build.sh, or manually: python3 validate.py

Exit code 0 = valid, 1 = errors found.
"""

import re, sys

XML_PATH = "LinkinPark Project/LinkinPark.xml"

errors   = []
warnings = []

def err(msg):  errors.append(f"  ✗ {msg}")
def warn(msg): warnings.append(f"  ⚠ {msg}")
def ok(msg):   print(f"  ✓ {msg}")

# ── Load ─────────────────────────────────────────────────────────────────────

with open(XML_PATH, "r", encoding="utf-8") as f:
    content = f.read()

# ── 1. Well-formed XML ────────────────────────────────────────────────────────

from xml.etree import ElementTree as ET
try:
    ET.fromstring(content.encode("utf-8"))
    ok("Well-formed XML")
except ET.ParseError as e:
    err(f"XML parse error: {e}")
    # Can't continue if XML is broken
    print("\nFATAL — fix XML first:\n" + "\n".join(errors))
    sys.exit(1)

# ── 2. Unescaped & in attribute values ───────────────────────────────────────

bad_amp = re.findall(r'Value="[^"]*&(?!amp;|lt;|gt;|quot;|apos;)[^"]*"', content)
if bad_amp:
    for b in bad_amp[:3]:
        err(f"Unescaped & in: {b[:80]}")
else:
    ok("No unescaped & in attribute values")

# ── 3. Slot count consistency ─────────────────────────────────────────────────

scene_count = len(re.findall(r'<Scene Id="\d+"', content))
slot_counts = {}

for m in re.finditer(r'<(AudioTrack|GroupTrack) Id="(\d+)"(.*?)</\1>', content, re.DOTALL):
    kind, tid, body = m.group(1), m.group(2), m.group(3)
    slot_lists = re.findall(r'<ClipSlotList>(.*?)</ClipSlotList>', body, re.DOTALL)
    counts = [len(re.findall(r'<ClipSlot Id=', sl)) for sl in slot_lists]
    slot_counts[f"{kind} {tid}"] = counts
    if any(c != scene_count for c in counts):
        err(f"Slot count mismatch on {kind} {tid}: {counts} (expected {scene_count} per list)")

if not any("Slot count mismatch" in e for e in errors):
    ok(f"All ClipSlotLists have {scene_count} slots")

# ── 4. GroupTrack ordering (must precede children) ────────────────────────────

track_order = []
group_positions = {}  # group_id -> position index

for i, m in enumerate(re.finditer(r'<(AudioTrack|GroupTrack) Id="(\d+)"', content)):
    kind, tid = m.group(1), int(m.group(2))
    track_order.append((kind, tid, i))
    if kind == "GroupTrack":
        group_positions[tid] = i

for kind, tid, pos in track_order:
    if kind == "AudioTrack":
        # Find its TrackGroupId
        t_idx = content.find(f'<AudioTrack Id="{tid}"')
        tg_m  = re.search(r'<TrackGroupId Value="([^"]+)"', content[t_idx:t_idx+500])
        if tg_m:
            gid = int(tg_m.group(1))
            if gid != -1 and gid in group_positions:
                if group_positions[gid] > pos:
                    err(f"AudioTrack {tid} appears before its GroupTrack {gid}")

if not any("appears before" in e for e in errors):
    ok("GroupTrack ordering correct (each group precedes children)")

# ── 5. No duplicate Scene IDs ─────────────────────────────────────────────────

scene_ids = re.findall(r'<Scene Id="(\d+)"', content)
seen = set()
for sid in scene_ids:
    if sid in seen:
        err(f"Duplicate Scene Id={sid}")
    seen.add(sid)
if not any("Duplicate Scene" in e for e in errors):
    ok(f"No duplicate Scene IDs ({len(scene_ids)} scenes)")

# ── 6. NextPointeeId uniqueness ───────────────────────────────────────────────

npi_m = re.search(r'NextPointeeId Value="(\d+)"', content)
if npi_m:
    npi = int(npi_m.group(1))
    # Check no Id attributes >= NextPointeeId
    all_ids = [int(x) for x in re.findall(
        r'(?:AutomationTarget|ModulationTarget|Pointee|VolumeModulationTarget'
        r'|TranspositionModulationTarget|AudioClip) Id="(\d+)"', content)]
    if all_ids:
        max_id = max(all_ids)
        if max_id >= npi:
            err(f"NextPointeeId={npi} is not > max used Id={max_id}")
        else:
            ok(f"NextPointeeId={npi} is valid (max used={max_id})")
else:
    err("NextPointeeId not found")

# ── 6b. Device element ID uniqueness (non-unique list ids crash) ─────────────

device_tags_re = (r'(?:Eq8|Compressor2|Saturator|MultibandDynamics|Delay|Reverb'
                  r'|AudioEffectGroupDevice|AutoFilter|Gate|StereoGain)')
device_ids = re.findall(f'<{device_tags_re}\\s[^>]*Id="(\\d+)"', content)
device_id_counts = {}
for did in device_ids:
    device_id_counts[did] = device_id_counts.get(did, 0) + 1
dup_device_ids = {k: v for k, v in device_id_counts.items() if v > 1}
if dup_device_ids:
    for did, cnt in sorted(dup_device_ids.items(), key=lambda x: int(x[0]))[:5]:
        err(f"Duplicate device Id={did} ({cnt}x) — causes 'non-unique list ids'")
else:
    ok(f"All {len(device_ids)} device element IDs are unique")

# ── 6c. No duplicate IDs within list elements (non-unique list ids crash) ────

list_checks = [
    ('Sends', 'TrackSendHolder'),
    ('ClipSlotList', 'ClipSlot'),
    ('Slots', 'GroupTrackSlot'),
]
list_dupes = 0
for parent_tag, child_tag in list_checks:
    for pm in re.finditer(f'<{parent_tag}>(.*?)</{parent_tag}>', content, re.DOTALL):
        ids = re.findall(f'<{child_tag} Id="(\\d+)"', pm.group(1))
        if len(ids) != len(set(ids)):
            dupes = [x for x in set(ids) if ids.count(x) > 1]
            err(f"Duplicate {child_tag} IDs {dupes} in a {parent_tag} section")
            list_dupes += 1
if list_dupes == 0:
    ok("No duplicate IDs within list elements (Sends, ClipSlots, Slots)")

# ── 7. All stem FLAC files exist ─────────────────────────────────────────────

import os
rel_paths = re.findall(r'<RelativePath Value="([^"]+\.flac)"', content)
# Unescape XML entities in paths (e.g. &amp; → &) before checking filesystem
def unescape_xml(s):
    return s.replace('&amp;', '&').replace('&apos;', "'").replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"')

missing = [p for p in rel_paths
           if not os.path.exists(f"LinkinPark Project/{unescape_xml(p)}")]
if missing:
    for p in missing[:5]:
        err(f"Missing FLAC: {p}")
    if len(missing) > 5:
        err(f"  ...and {len(missing)-5} more")
else:
    ok(f"All {len(rel_paths)} FLAC files present")

# ── 8. Scene IDs are sequential 0..N ─────────────────────────────────────────

scene_ids_int = [int(sid) for sid in scene_ids]
expected_seq = list(range(len(scene_ids)))
if scene_ids_int != expected_seq:
    err(f"Scene IDs are not sequential 0..N: {scene_ids_int}")
else:
    ok(f"Scene IDs are sequential 0..{len(scene_ids)-1}")

# ── 9. GroupTrack <Slots> count matches scene count ───────────────────────────

group_ids = re.findall(r'<GroupTrack Id="(\d+)"', content)
for gid in group_ids:
    t_start = content.find(f'<GroupTrack Id="{gid}"')
    t_end   = content.find('</GroupTrack>', t_start) + len('</GroupTrack>')
    gt_body = content[t_start:t_end]

    # Check Slots section exists
    if '<Slots>' not in gt_body:
        err(f"GroupTrack {gid} missing <Slots> section")
        continue

    slot_count = len(re.findall(r'<GroupTrackSlot Id=', gt_body))
    if slot_count != scene_count:
        err(f"GroupTrack {gid} has {slot_count} GroupTrackSlots (expected {scene_count})")

if not any("GroupTrack" in e and ("missing" in e or "GroupTrackSlots" in e) for e in errors):
    ok(f"All GroupTracks have {scene_count} GroupTrackSlots")

# ── 10. GroupTrack Mixer: Volume after SplitStereoPanR ────────────────────────

for gid in group_ids:
    t_start = content.find(f'<GroupTrack Id="{gid}"')
    t_end   = content.find('</GroupTrack>', t_start) + len('</GroupTrack>')
    gt_body = content[t_start:t_end]

    vol_pos      = gt_body.find('<Volume>')
    split_l_pos  = gt_body.find('<SplitStereoPanL>')
    if vol_pos != -1 and split_l_pos != -1 and vol_pos < split_l_pos:
        err(f"GroupTrack {gid} Mixer: <Volume> appears before <SplitStereoPanL> (wrong order)")

if not any("GroupTrack" in e and "wrong order" in e for e in errors):
    ok("GroupTrack Mixer Volume order correct (Pan → SplitStereoPanL → SplitStereoPanR → Volume)")

# ── 11. Scene BPMs are set ────────────────────────────────────────────────────

scenes_section = content[content.find('<Scenes>'):content.find('</Scenes>')]
no_bpm = []
for m in re.finditer(r'<Scene Id="(\d+)".*?<Name Value="([^"]*)"', scenes_section, re.DOTALL):
    sid, name = m.group(1), m.group(2)
    if name:
        # Check for Tempo element nearby
        chunk = scenes_section[m.start():m.start()+800]
        if '<Tempo Value=' not in chunk and '<Manual Value=' not in chunk:
            no_bpm.append(f"Scene {sid} '{name}'")

if no_bpm:
    for s in no_bpm:
        warn(f"No BPM set for {s}")
else:
    ok("All named scenes have BPM")

# ── 12. AudioTrack output routing to group is correct ────────────────────────

for m in re.finditer(r'<AudioTrack Id="(\d+)"(.*?)</AudioTrack>', content, re.DOTALL):
    tid, body = m.group(1), m.group(2)
    # Only check tracks that have a TrackGroupId != -1
    tgi = re.search(r'<TrackGroupId Value="([^"]+)"', body)
    if not tgi or tgi.group(1) == '-1':
        continue
    ao = re.search(r'<AudioOutputRouting>(.*?)</AudioOutputRouting>', body, re.DOTALL)
    if not ao:
        continue
    target = re.search(r'<Target Value="([^"]+)"', ao.group(1))
    if target and target.group(1) != 'AudioOut/GroupTrack':
        err(f"AudioTrack {tid}: AudioOutputRouting Target should be 'AudioOut/GroupTrack', got '{target.group(1)}'")

if not any("AudioOutputRouting" in e for e in errors):
    ok("AudioTrack group routing uses 'AudioOut/GroupTrack' (not specific group ID)")

# ── Summary ───────────────────────────────────────────────────────────────────

print()
if warnings:
    print("Warnings:")
    print("\n".join(warnings))
    print()

if errors:
    print("Errors:")
    print("\n".join(errors))
    print(f"\n✗ Validation failed ({len(errors)} error(s))")
    sys.exit(1)
else:
    print(f"✓ All checks passed — safe to build")
    sys.exit(0)
