new version

2026-05-10 21:05:24 +02:00
parent 01110c0922
commit 7c86482c81
2 changed files with 57 additions and 5 deletions
--- a/config.json
+++ b/config.json
@@ -4,11 +4,11 @@
  "output_dir": "",
  "video_input_dir": "C:/Users/timoh/Desktop/faceswap/Eingabe",
  "video_output_dir": "C:\\Users\\timoh\\Desktop\\faceswap\\Eingabe\\output_videos",
-  "voice_ref": "C:/Users/timoh/Desktop/Swap/hay.mp3",
+  "voice_ref": "C:/Users/timoh/Desktop/faceswap/hay.mp3",
-  "voice_source_audio": "",
+  "voice_source_audio": "C:/Users/timoh/Desktop/faceswap/The_Little_One_Youve_Always_Been_By_PowerfulListen.wav",
  "voice_output": "C:/Users/timoh/Desktop/faceswap/Ausgabe/hay_cloned.wav",
  "voice_language": "en",
-  "voice_mode": "text",
+  "voice_mode": "audio",
  "enhance": true,
  "color": true,
  "voice_text": "Obsession. It's a fun word, a word that makes your body shake, it makes you feel a little bit strange, but good after all. That obsession comes from me; you are obsessed, addicted, owned, and you love it."
--- a/face_swap.py
+++ b/face_swap.py
@@ -1190,9 +1190,61 @@ class VoiceCloner:
        speaker_wav = self._check_audio(speaker_wav, "Referenz-Stimme")
        source_wav  = self._check_audio(source_wav,  "Eingabe-Audio")
        self._ensure_vc()
        import tempfile, math
        try:
            import soundfile as sf
            import numpy as np
        except ImportError:
            raise RuntimeError(
                "Bitte installiere soundfile:\n"
                "  python -m pip install soundfile"
            )
        out_path = Path(out_file)
        out_path.parent.mkdir(parents=True, exist_ok=True)
-        self.vc.voice_conversion_to_file(source_wav=source_wav, target_wav=speaker_wav, file_path=str(out_path))
+    
        # Audiodatei laden und in Segmente aufteilen
        CHUNK_SEC = 30          # Segmentlänge in Sekunden (bei RAM-Problemen kleiner wählen, z.B. 20)
        data, sr = sf.read(source_wav, always_2d=False)
        chunk_samples = int(CHUNK_SEC * sr)
        total_samples = len(data)
        num_chunks = math.ceil(total_samples / chunk_samples)
        if num_chunks <= 1:
            # Kurze Datei: direkt verarbeiten
            self.vc.voice_conversion_to_file(
                source_wav=source_wav, target_wav=speaker_wav, file_path=str(out_path)
            )
            return str(out_path)
        self.log(f"VOICE: Datei zu lang — teile in {num_chunks} Segmente à {CHUNK_SEC}s ...")
        results = []
        with tempfile.TemporaryDirectory() as tmpdir:
            for i in range(num_chunks):
                start = i * chunk_samples
                end   = min(start + chunk_samples, total_samples)
                chunk = data[start:end]
                chunk_in  = Path(tmpdir) / f"chunk_{i:04d}_in.wav"
                chunk_out = Path(tmpdir) / f"chunk_{i:04d}_out.wav"
                sf.write(str(chunk_in), chunk, sr)
                self.log(f"VOICE: Segment {i+1}/{num_chunks} ...")
                self.vc.voice_conversion_to_file(
                    source_wav=str(chunk_in), target_wav=speaker_wav,
                    file_path=str(chunk_out)
                )
                out_data, out_sr = sf.read(str(chunk_out))
                results.append((out_data, out_sr))
            # Segmente zusammenführen
            self.log("VOICE: Füge Segmente zusammen ...")
            target_sr = results[0][1]
            merged = np.concatenate(
                [r if sr == target_sr else r  # ggf. Resampling hier einfügen
                 for r, sr in results], axis=0
            )
            sf.write(str(out_path), merged, target_sr)
        return str(out_path)