#!/usr/bin/env python3 """ validate.py — Validate LinkinPark.xml before building .als Checks all known failure modes based on hard-won experience. Run automatically by build.sh, or manually: python3 validate.py Exit code 0 = valid, 1 = errors found. """ import re, sys XML_PATH = "LinkinPark Project/LinkinPark.xml" errors = [] warnings = [] def err(msg): errors.append(f" ✗ {msg}") def warn(msg): warnings.append(f" ⚠ {msg}") def ok(msg): print(f" ✓ {msg}") # ── Load ───────────────────────────────────────────────────────────────────── with open(XML_PATH, "r", encoding="utf-8") as f: content = f.read() # ── 1. Well-formed XML ──────────────────────────────────────────────────────── from xml.etree import ElementTree as ET try: ET.fromstring(content.encode("utf-8")) ok("Well-formed XML") except ET.ParseError as e: err(f"XML parse error: {e}") # Can't continue if XML is broken print("\nFATAL — fix XML first:\n" + "\n".join(errors)) sys.exit(1) # ── 2. Unescaped & in attribute values ─────────────────────────────────────── bad_amp = re.findall(r'Value="[^"]*&(?!amp;|lt;|gt;|quot;|apos;)[^"]*"', content) if bad_amp: for b in bad_amp[:3]: err(f"Unescaped & in: {b[:80]}") else: ok("No unescaped & in attribute values") # ── 3. Slot count consistency ───────────────────────────────────────────────── scene_count = len(re.findall(r'', content, re.DOTALL): kind, tid, body = m.group(1), m.group(2), m.group(3) slot_lists = re.findall(r'(.*?)', body, re.DOTALL) counts = [len(re.findall(r' pos: err(f"AudioTrack {tid} appears before its GroupTrack {gid}") if not any("appears before" in e for e in errors): ok("GroupTrack ordering correct (each group precedes children)") # ── 5. No duplicate Scene IDs ───────────────────────────────────────────────── scene_ids = re.findall(r'= NextPointeeId all_ids = [int(x) for x in re.findall( r'(?:AutomationTarget|ModulationTarget|Pointee|VolumeModulationTarget' r'|TranspositionModulationTarget|AudioClip) Id="(\d+)"', content)] if all_ids: max_id = max(all_ids) if max_id >= npi: err(f"NextPointeeId={npi} is not > max used Id={max_id}") else: ok(f"NextPointeeId={npi} is valid (max used={max_id})") else: err("NextPointeeId not found") # ── 6b. Device element ID uniqueness (non-unique list ids crash) ───────────── device_tags_re = (r'(?:Eq8|Compressor2|Saturator|MultibandDynamics|Delay|Reverb' r'|AudioEffectGroupDevice|AutoFilter|Gate|StereoGain)') device_ids = re.findall(f'<{device_tags_re}\\s[^>]*Id="(\\d+)"', content) device_id_counts = {} for did in device_ids: device_id_counts[did] = device_id_counts.get(did, 0) + 1 dup_device_ids = {k: v for k, v in device_id_counts.items() if v > 1} if dup_device_ids: for did, cnt in sorted(dup_device_ids.items(), key=lambda x: int(x[0]))[:5]: err(f"Duplicate device Id={did} ({cnt}x) — causes 'non-unique list ids'") else: ok(f"All {len(device_ids)} device element IDs are unique") # ── 6c. No duplicate IDs within list elements (non-unique list ids crash) ──── list_checks = [ ('Sends', 'TrackSendHolder'), ('ClipSlotList', 'ClipSlot'), ('Slots', 'GroupTrackSlot'), ] list_dupes = 0 for parent_tag, child_tag in list_checks: for pm in re.finditer(f'<{parent_tag}>(.*?)', content, re.DOTALL): ids = re.findall(f'<{child_tag} Id="(\\d+)"', pm.group(1)) if len(ids) != len(set(ids)): dupes = [x for x in set(ids) if ids.count(x) > 1] err(f"Duplicate {child_tag} IDs {dupes} in a {parent_tag} section") list_dupes += 1 if list_dupes == 0: ok("No duplicate IDs within list elements (Sends, ClipSlots, Slots)") # ── 7. All stem FLAC files exist ───────────────────────────────────────────── import os rel_paths = re.findall(r'').replace('"', '"') missing = [p for p in rel_paths if not os.path.exists(f"LinkinPark Project/{unescape_xml(p)}")] if missing: for p in missing[:5]: err(f"Missing FLAC: {p}") if len(missing) > 5: err(f" ...and {len(missing)-5} more") else: ok(f"All {len(rel_paths)} FLAC files present") # ── 8. Scene IDs are sequential 0..N ───────────────────────────────────────── scene_ids_int = [int(sid) for sid in scene_ids] expected_seq = list(range(len(scene_ids))) if scene_ids_int != expected_seq: err(f"Scene IDs are not sequential 0..N: {scene_ids_int}") else: ok(f"Scene IDs are sequential 0..{len(scene_ids)-1}") # ── 9. GroupTrack count matches scene count ─────────────────────────── group_ids = re.findall(r'', t_start) + len('') gt_body = content[t_start:t_end] # Check Slots section exists if '' not in gt_body: err(f"GroupTrack {gid} missing section") continue slot_count = len(re.findall(r'', t_start) + len('') gt_body = content[t_start:t_end] vol_pos = gt_body.find('') split_l_pos = gt_body.find('') if vol_pos != -1 and split_l_pos != -1 and vol_pos < split_l_pos: err(f"GroupTrack {gid} Mixer: appears before (wrong order)") if not any("GroupTrack" in e and "wrong order" in e for e in errors): ok("GroupTrack Mixer Volume order correct (Pan → SplitStereoPanL → SplitStereoPanR → Volume)") # ── 11. Scene BPMs are set ──────────────────────────────────────────────────── scenes_section = content[content.find(''):content.find('')] no_bpm = [] for m in re.finditer(r'', content, re.DOTALL): tid, body = m.group(1), m.group(2) # Only check tracks that have a TrackGroupId != -1 tgi = re.search(r'(.*?)', body, re.DOTALL) if not ao: continue target = re.search(r'