diff --git a/modules/globals.py b/modules/globals.py index cac2302..cffae16 100644 --- a/modules/globals.py +++ b/modules/globals.py @@ -36,8 +36,3 @@ fp_ui: Dict[str, bool] = {"face_enhancer": False} camera_input_combobox = None webcam_preview_running = False show_fps = False -mouth_mask = False -show_mouth_mask_box = False -mask_feather_ratio = 8 -mask_down_size = 0.50 -mask_size = 1 diff --git a/modules/processors/frame/face_swapper.py b/modules/processors/frame/face_swapper.py index 0c7d88c..6fd0760 100644 --- a/modules/processors/frame/face_swapper.py +++ b/modules/processors/frame/face_swapper.py @@ -2,49 +2,35 @@ from typing import Any, List import cv2 import insightface import threading -import numpy as np + import modules.globals import modules.processors.frame.core from modules.core import update_status from modules.face_analyser import get_one_face, get_many_faces, default_source_face from modules.typing import Face, Frame -from modules.utilities import ( - conditional_download, - resolve_relative_path, - is_image, - is_video, -) +from modules.utilities import conditional_download, resolve_relative_path, is_image, is_video from modules.cluster_analysis import find_closest_centroid FACE_SWAPPER = None THREAD_LOCK = threading.Lock() -NAME = "DLC.FACE-SWAPPER" +NAME = 'DLC.FACE-SWAPPER' def pre_check() -> bool: - download_directory_path = resolve_relative_path("../models") - conditional_download( - download_directory_path, - [ - "https://huggingface.co/hacksider/deep-live-cam/blob/main/inswapper_128_fp16.onnx" - ], - ) + download_directory_path = resolve_relative_path('../models') + conditional_download(download_directory_path, ['https://huggingface.co/hacksider/deep-live-cam/blob/main/inswapper_128_fp16.onnx']) return True def pre_start() -> bool: if not modules.globals.map_faces and not is_image(modules.globals.source_path): - update_status("Select an image for source path.", NAME) + update_status('Select an image for source path.', NAME) return False - elif not modules.globals.map_faces and not get_one_face( - cv2.imread(modules.globals.source_path) - ): - update_status("No face in source path detected.", NAME) + elif not modules.globals.map_faces and not get_one_face(cv2.imread(modules.globals.source_path)): + update_status('No face in source path detected.', NAME) return False - if not is_image(modules.globals.target_path) and not is_video( - modules.globals.target_path - ): - update_status("Select an image or video for target path.", NAME) + if not is_image(modules.globals.target_path) and not is_video(modules.globals.target_path): + update_status('Select an image or video for target path.', NAME) return False return True @@ -54,48 +40,20 @@ def get_face_swapper() -> Any: with THREAD_LOCK: if FACE_SWAPPER is None: - model_path = resolve_relative_path("../models/inswapper_128_fp16.onnx") - FACE_SWAPPER = insightface.model_zoo.get_model( - model_path, providers=modules.globals.execution_providers - ) + model_path = resolve_relative_path('../models/inswapper_128_fp16.onnx') + FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=modules.globals.execution_providers) return FACE_SWAPPER def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: - face_swapper = get_face_swapper() - - # Apply the face swap - swapped_frame = face_swapper.get( - temp_frame, target_face, source_face, paste_back=True - ) - - if modules.globals.mouth_mask: - # Create a mask for the target face - face_mask = create_face_mask(target_face, temp_frame) - - # Create the mouth mask - mouth_mask, mouth_cutout, mouth_box, lower_lip_polygon = ( - create_lower_mouth_mask(target_face, temp_frame) - ) - - # Apply the mouth area - swapped_frame = apply_mouth_area( - swapped_frame, mouth_cutout, mouth_box, face_mask, lower_lip_polygon - ) - - if modules.globals.show_mouth_mask_box: - mouth_mask_data = (mouth_mask, mouth_cutout, mouth_box, lower_lip_polygon) - swapped_frame = draw_mouth_mask_visualization( - swapped_frame, target_face, mouth_mask_data - ) - - return swapped_frame + return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True) def process_frame(source_face: Face, temp_frame: Frame) -> Frame: + # Ensure the frame is in RGB format if color correction is enabled if modules.globals.color_correction: temp_frame = cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB) - + if modules.globals.many_faces: many_faces = get_many_faces(temp_frame) if many_faces: @@ -113,44 +71,35 @@ def process_frame_v2(temp_frame: Frame, temp_frame_path: str = "") -> Frame: if modules.globals.many_faces: source_face = default_source_face() for map in modules.globals.souce_target_map: - target_face = map["target"]["face"] + target_face = map['target']['face'] temp_frame = swap_face(source_face, target_face, temp_frame) elif not modules.globals.many_faces: for map in modules.globals.souce_target_map: if "source" in map: - source_face = map["source"]["face"] - target_face = map["target"]["face"] + source_face = map['source']['face'] + target_face = map['target']['face'] temp_frame = swap_face(source_face, target_face, temp_frame) elif is_video(modules.globals.target_path): if modules.globals.many_faces: source_face = default_source_face() for map in modules.globals.souce_target_map: - target_frame = [ - f - for f in map["target_faces_in_frame"] - if f["location"] == temp_frame_path - ] + target_frame = [f for f in map['target_faces_in_frame'] if f['location'] == temp_frame_path] for frame in target_frame: - for target_face in frame["faces"]: + for target_face in frame['faces']: temp_frame = swap_face(source_face, target_face, temp_frame) elif not modules.globals.many_faces: for map in modules.globals.souce_target_map: if "source" in map: - target_frame = [ - f - for f in map["target_faces_in_frame"] - if f["location"] == temp_frame_path - ] - source_face = map["source"]["face"] + target_frame = [f for f in map['target_faces_in_frame'] if f['location'] == temp_frame_path] + source_face = map['source']['face'] for frame in target_frame: - for target_face in frame["faces"]: + for target_face in frame['faces']: temp_frame = swap_face(source_face, target_face, temp_frame) - else: detected_faces = get_many_faces(temp_frame) if modules.globals.many_faces: @@ -161,46 +110,25 @@ def process_frame_v2(temp_frame: Frame, temp_frame_path: str = "") -> Frame: elif not modules.globals.many_faces: if detected_faces: - if len(detected_faces) <= len( - modules.globals.simple_map["target_embeddings"] - ): + if len(detected_faces) <= len(modules.globals.simple_map['target_embeddings']): for detected_face in detected_faces: - closest_centroid_index, _ = find_closest_centroid( - modules.globals.simple_map["target_embeddings"], - detected_face.normed_embedding, - ) + closest_centroid_index, _ = find_closest_centroid(modules.globals.simple_map['target_embeddings'], detected_face.normed_embedding) - temp_frame = swap_face( - modules.globals.simple_map["source_faces"][ - closest_centroid_index - ], - detected_face, - temp_frame, - ) + temp_frame = swap_face(modules.globals.simple_map['source_faces'][closest_centroid_index], detected_face, temp_frame) else: detected_faces_centroids = [] for face in detected_faces: - detected_faces_centroids.append(face.normed_embedding) + detected_faces_centroids.append(face.normed_embedding) i = 0 - for target_embedding in modules.globals.simple_map[ - "target_embeddings" - ]: - closest_centroid_index, _ = find_closest_centroid( - detected_faces_centroids, target_embedding - ) + for target_embedding in modules.globals.simple_map['target_embeddings']: + closest_centroid_index, _ = find_closest_centroid(detected_faces_centroids, target_embedding) - temp_frame = swap_face( - modules.globals.simple_map["source_faces"][i], - detected_faces[closest_centroid_index], - temp_frame, - ) + temp_frame = swap_face(modules.globals.simple_map['source_faces'][i], detected_faces[closest_centroid_index], temp_frame) i += 1 return temp_frame -def process_frames( - source_path: str, temp_frame_paths: List[str], progress: Any = None -) -> None: +def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any = None) -> None: if not modules.globals.map_faces: source_face = get_one_face(cv2.imread(source_path)) for temp_frame_path in temp_frame_paths: @@ -234,9 +162,7 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None: cv2.imwrite(output_path, result) else: if modules.globals.many_faces: - update_status( - "Many faces enabled. Using first source image. Progressing...", NAME - ) + update_status('Many faces enabled. Using first source image. Progressing...', NAME) target_frame = cv2.imread(output_path) result = process_frame_v2(target_frame) cv2.imwrite(output_path, result) @@ -244,367 +170,5 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None: def process_video(source_path: str, temp_frame_paths: List[str]) -> None: if modules.globals.map_faces and modules.globals.many_faces: - update_status( - "Many faces enabled. Using first source image. Progressing...", NAME - ) - modules.processors.frame.core.process_video( - source_path, temp_frame_paths, process_frames - ) - - -def create_lower_mouth_mask( - face: Face, frame: Frame -) -> (np.ndarray, np.ndarray, tuple, np.ndarray): - mask = np.zeros(frame.shape[:2], dtype=np.uint8) - mouth_cutout = None - landmarks = face.landmark_2d_106 - if landmarks is not None: - # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 - lower_lip_order = [ - 65, - 66, - 62, - 70, - 69, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 0, - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 65, - ] - lower_lip_landmarks = landmarks[lower_lip_order].astype( - np.float32 - ) # Use float for precise calculations - - # Calculate the center of the landmarks - center = np.mean(lower_lip_landmarks, axis=0) - - # Expand the landmarks outward - expansion_factor = ( - 1 + modules.globals.mask_down_size - ) # Adjust this for more or less expansion - expanded_landmarks = (lower_lip_landmarks - center) * expansion_factor + center - - # Extend the top lip part - toplip_indices = [ - 20, - 0, - 1, - 2, - 3, - 4, - 5, - ] # Indices for landmarks 2, 65, 66, 62, 70, 69, 18 - toplip_extension = ( - modules.globals.mask_size * 0.5 - ) # Adjust this factor to control the extension - for idx in toplip_indices: - direction = expanded_landmarks[idx] - center - direction = direction / np.linalg.norm(direction) - expanded_landmarks[idx] += direction * toplip_extension - - # Extend the bottom part (chin area) - chin_indices = [ - 11, - 12, - 13, - 14, - 15, - 16, - ] # Indices for landmarks 21, 22, 23, 24, 0, 8 - chin_extension = 2 * 0.2 # Adjust this factor to control the extension - for idx in chin_indices: - expanded_landmarks[idx][1] += ( - expanded_landmarks[idx][1] - center[1] - ) * chin_extension - - # Convert back to integer coordinates - expanded_landmarks = expanded_landmarks.astype(np.int32) - - # Calculate bounding box for the expanded lower mouth - min_x, min_y = np.min(expanded_landmarks, axis=0) - max_x, max_y = np.max(expanded_landmarks, axis=0) - - # Add some padding to the bounding box - padding = int((max_x - min_x) * 0.1) # 10% padding - min_x = max(0, min_x - padding) - min_y = max(0, min_y - padding) - max_x = min(frame.shape[1], max_x + padding) - max_y = min(frame.shape[0], max_y + padding) - - # Ensure the bounding box dimensions are valid - if max_x <= min_x or max_y <= min_y: - if (max_x - min_x) <= 1: - max_x = min_x + 1 - if (max_y - min_y) <= 1: - max_y = min_y + 1 - - # Create the mask - mask_roi = np.zeros((max_y - min_y, max_x - min_x), dtype=np.uint8) - cv2.fillPoly(mask_roi, [expanded_landmarks - [min_x, min_y]], 255) - - # Apply Gaussian blur to soften the mask edges - mask_roi = cv2.GaussianBlur(mask_roi, (15, 15), 5) - - # Place the mask ROI in the full-sized mask - mask[min_y:max_y, min_x:max_x] = mask_roi - - # Extract the masked area from the frame - mouth_cutout = frame[min_y:max_y, min_x:max_x].copy() - - # Return the expanded lower lip polygon in original frame coordinates - lower_lip_polygon = expanded_landmarks - - return mask, mouth_cutout, (min_x, min_y, max_x, max_y), lower_lip_polygon - - -def draw_mouth_mask_visualization( - frame: Frame, face: Face, mouth_mask_data: tuple -) -> Frame: - landmarks = face.landmark_2d_106 - if landmarks is not None and mouth_mask_data is not None: - mask, mouth_cutout, (min_x, min_y, max_x, max_y), lower_lip_polygon = ( - mouth_mask_data - ) - - vis_frame = frame.copy() - - # Ensure coordinates are within frame bounds - height, width = vis_frame.shape[:2] - min_x, min_y = max(0, min_x), max(0, min_y) - max_x, max_y = min(width, max_x), min(height, max_y) - - # Adjust mask to match the region size - mask_region = mask[0 : max_y - min_y, 0 : max_x - min_x] - - # Remove the color mask overlay - # color_mask = cv2.applyColorMap((mask_region * 255).astype(np.uint8), cv2.COLORMAP_JET) - - # Ensure shapes match before blending - vis_region = vis_frame[min_y:max_y, min_x:max_x] - # Remove blending with color_mask - # if vis_region.shape[:2] == color_mask.shape[:2]: - # blended = cv2.addWeighted(vis_region, 0.7, color_mask, 0.3, 0) - # vis_frame[min_y:max_y, min_x:max_x] = blended - - # Draw the lower lip polygon - cv2.polylines(vis_frame, [lower_lip_polygon], True, (0, 255, 0), 2) - - # Remove the red box - # cv2.rectangle(vis_frame, (min_x, min_y), (max_x, max_y), (0, 0, 255), 2) - - # Visualize the feathered mask - feather_amount = max( - 1, - min( - 30, - (max_x - min_x) // modules.globals.mask_feather_ratio, - (max_y - min_y) // modules.globals.mask_feather_ratio, - ), - ) - # Ensure kernel size is odd - kernel_size = 2 * feather_amount + 1 - feathered_mask = cv2.GaussianBlur( - mask_region.astype(float), (kernel_size, kernel_size), 0 - ) - feathered_mask = (feathered_mask / feathered_mask.max() * 255).astype(np.uint8) - # Remove the feathered mask color overlay - # color_feathered_mask = cv2.applyColorMap(feathered_mask, cv2.COLORMAP_VIRIDIS) - - # Ensure shapes match before blending feathered mask - # if vis_region.shape == color_feathered_mask.shape: - # blended_feathered = cv2.addWeighted(vis_region, 0.7, color_feathered_mask, 0.3, 0) - # vis_frame[min_y:max_y, min_x:max_x] = blended_feathered - - # Add labels - cv2.putText( - vis_frame, - "Lower Mouth Mask", - (min_x, min_y - 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 1, - ) - cv2.putText( - vis_frame, - "Feathered Mask", - (min_x, max_y + 20), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 1, - ) - - return vis_frame - return frame - - -def apply_mouth_area( - frame: np.ndarray, - mouth_cutout: np.ndarray, - mouth_box: tuple, - face_mask: np.ndarray, - mouth_polygon: np.ndarray, -) -> np.ndarray: - min_x, min_y, max_x, max_y = mouth_box - box_width = max_x - min_x - box_height = max_y - min_y - - if ( - mouth_cutout is None - or box_width is None - or box_height is None - or face_mask is None - or mouth_polygon is None - ): - return frame - - try: - resized_mouth_cutout = cv2.resize(mouth_cutout, (box_width, box_height)) - roi = frame[min_y:max_y, min_x:max_x] - - if roi.shape != resized_mouth_cutout.shape: - resized_mouth_cutout = cv2.resize( - resized_mouth_cutout, (roi.shape[1], roi.shape[0]) - ) - - color_corrected_mouth = apply_color_transfer(resized_mouth_cutout, roi) - - # Use the provided mouth polygon to create the mask - polygon_mask = np.zeros(roi.shape[:2], dtype=np.uint8) - adjusted_polygon = mouth_polygon - [min_x, min_y] - cv2.fillPoly(polygon_mask, [adjusted_polygon], 255) - - # Apply feathering to the polygon mask - feather_amount = min( - 30, - box_width // modules.globals.mask_feather_ratio, - box_height // modules.globals.mask_feather_ratio, - ) - feathered_mask = cv2.GaussianBlur( - polygon_mask.astype(float), (0, 0), feather_amount - ) - feathered_mask = feathered_mask / feathered_mask.max() - - face_mask_roi = face_mask[min_y:max_y, min_x:max_x] - combined_mask = feathered_mask * (face_mask_roi / 255.0) - - combined_mask = combined_mask[:, :, np.newaxis] - blended = ( - color_corrected_mouth * combined_mask + roi * (1 - combined_mask) - ).astype(np.uint8) - - # Apply face mask to blended result - face_mask_3channel = ( - np.repeat(face_mask_roi[:, :, np.newaxis], 3, axis=2) / 255.0 - ) - final_blend = blended * face_mask_3channel + roi * (1 - face_mask_3channel) - - frame[min_y:max_y, min_x:max_x] = final_blend.astype(np.uint8) - except Exception as e: - pass - - return frame - - -def create_face_mask(face: Face, frame: Frame) -> np.ndarray: - mask = np.zeros(frame.shape[:2], dtype=np.uint8) - landmarks = face.landmark_2d_106 - if landmarks is not None: - # Convert landmarks to int32 - landmarks = landmarks.astype(np.int32) - - # Extract facial features - right_side_face = landmarks[0:16] - left_side_face = landmarks[17:32] - right_eye = landmarks[33:42] - right_eye_brow = landmarks[43:51] - left_eye = landmarks[87:96] - left_eye_brow = landmarks[97:105] - - # Calculate forehead extension - right_eyebrow_top = np.min(right_eye_brow[:, 1]) - left_eyebrow_top = np.min(left_eye_brow[:, 1]) - eyebrow_top = min(right_eyebrow_top, left_eyebrow_top) - - face_top = np.min([right_side_face[0, 1], left_side_face[-1, 1]]) - forehead_height = face_top - eyebrow_top - extended_forehead_height = int(forehead_height * 5.0) # Extend by 50% - - # Create forehead points - forehead_left = right_side_face[0].copy() - forehead_right = left_side_face[-1].copy() - forehead_left[1] -= extended_forehead_height - forehead_right[1] -= extended_forehead_height - - # Combine all points to create the face outline - face_outline = np.vstack( - [ - [forehead_left], - right_side_face, - left_side_face[ - ::-1 - ], # Reverse left side to create a continuous outline - [forehead_right], - ] - ) - - # Calculate padding - padding = int( - np.linalg.norm(right_side_face[0] - left_side_face[-1]) * 0.05 - ) # 5% of face width - - # Create a slightly larger convex hull for padding - hull = cv2.convexHull(face_outline) - hull_padded = [] - for point in hull: - x, y = point[0] - center = np.mean(face_outline, axis=0) - direction = np.array([x, y]) - center - direction = direction / np.linalg.norm(direction) - padded_point = np.array([x, y]) + direction * padding - hull_padded.append(padded_point) - - hull_padded = np.array(hull_padded, dtype=np.int32) - - # Fill the padded convex hull - cv2.fillConvexPoly(mask, hull_padded, 255) - - # Smooth the mask edges - mask = cv2.GaussianBlur(mask, (5, 5), 3) - - return mask - - -def apply_color_transfer(source, target): - """ - Apply color transfer from target to source image - """ - source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32") - target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32") - - source_mean, source_std = cv2.meanStdDev(source) - target_mean, target_std = cv2.meanStdDev(target) - - # Reshape mean and std to be broadcastable - source_mean = source_mean.reshape(1, 1, 3) - source_std = source_std.reshape(1, 1, 3) - target_mean = target_mean.reshape(1, 1, 3) - target_std = target_std.reshape(1, 1, 3) - - # Perform the color transfer - source = (source - source_mean) * (target_std / source_std) + target_mean - - return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR) + update_status('Many faces enabled. Using first source image. Progressing...', NAME) + modules.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames) diff --git a/modules/ui.py b/modules/ui.py index 98c3234..bbfebf1 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -269,28 +269,6 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C ) show_fps_switch.place(relx=0.6, rely=0.75) - mouth_mask_var = ctk.BooleanVar(value=modules.globals.mouth_mask) - mouth_mask_switch = ctk.CTkSwitch( - root, - text="Mouth Mask", - variable=mouth_mask_var, - cursor="hand2", - command=lambda: setattr(modules.globals, "mouth_mask", mouth_mask_var.get()), - ) - mouth_mask_switch.place(relx=0.1, rely=0.55) - - show_mouth_mask_box_var = ctk.BooleanVar(value=modules.globals.show_mouth_mask_box) - show_mouth_mask_box_switch = ctk.CTkSwitch( - root, - text="Show Mouth Mask Box", - variable=show_mouth_mask_box_var, - cursor="hand2", - command=lambda: setattr( - modules.globals, "show_mouth_mask_box", show_mouth_mask_box_var.get() - ), - ) - show_mouth_mask_box_switch.place(relx=0.6, rely=0.55) - start_button = ctk.CTkButton( root, text="Start", cursor="hand2", command=lambda: analyze_target(start, root) ) diff --git a/switch_states.json b/switch_states.json deleted file mode 100644 index 625cf3e..0000000 --- a/switch_states.json +++ /dev/null @@ -1 +0,0 @@ -{"keep_fps": false, "keep_audio": false, "keep_frames": false, "many_faces": false, "map_faces": false, "color_correction": false, "nsfw_filter": false, "live_mirror": false, "live_resizable": true, "fp_ui": {"face_enhancer": false}, "show_fps": false} \ No newline at end of file