From 83a8e680fa0e06eb1758997a470def254bdb508a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=88=B1=E5=96=9D=E6=B0=B4=E7=9A=84=E6=9C=A8=E5=AD=90?=
 <50564847+ice-a@users.noreply.github.com>
Date: Sat, 10 Jan 2026 20:19:32 +0800
Subject: [PATCH] Add files via upload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

upload:人脸检测代码
---
 drop_duplicates.py | 217 +++++++++++++++++++++++++++++++++++++++++++++
 face_detect.py     | 151 +++++++++++++++++++++++++++++++
 2 files changed, 368 insertions(+)
 create mode 100644 drop_duplicates.py
 create mode 100644 face_detect.py

diff --git a/drop_duplicates.py b/drop_duplicates.py
new file mode 100644
index 0000000..faaca1b
--- /dev/null
+++ b/drop_duplicates.py
@@ -0,0 +1,217 @@
+import os
+import shutil
+import time
+import ctypes
+import imagehash
+from PIL import Image, UnidentifiedImageError
+
+# -------------------------- 配置参数 (按需修改) --------------------------
+SOURCE_FOLDER = r"D:\img"                          # 原始图片库目录
+TARGET_FOLDER = r"D:\img_todo\all_img_result"     # 去重后图片输出目录
+SUPPORTED_FORMATS = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif')  # 支持的图片格式
+MIN_PIXEL_COUNT = 1000                              # 最小像素数阈值（宽或高小于此值的图片将被过滤）
+RETRY_TIMES = 5                                     # 文件操作失败重试次数
+RETRY_DELAY = 2                                     # 重试间隔（秒）
+HASH_SIZE = 8                                       # 哈希计算尺寸（8-16之间选择，影响去重精度）
+
+
+# -------------------------- 系统级辅助函数 --------------------------
+def release_file_cache():
+    """Windows系统-释放文件缓存/句柄，非Windows系统无影响"""
+    try:
+        if os.name == "nt":
+            ctypes.windll.kernel32.SetErrorMode(0x0001)
+            ctypes.windll.psapi.EmptyWorkingSet(ctypes.windll.kernel32.GetCurrentProcess())
+    except Exception as e:
+        print(f"⚠️ 释放文件缓存失败: {e}")
+
+
+def remove_readonly_attr(file_path):
+    """跨平台移除文件只读属性"""
+    if os.path.exists(file_path):
+        try:
+            if os.name == "nt":
+                ctypes.windll.kernel32.SetFileAttributesW(file_path, 128)  # Windows移除只读
+            else:
+                os.chmod(file_path, 0o777)  # 类Unix系统
+        except Exception as e:
+            print(f"⚠️ 移除只读属性失败 {file_path}: {e}")
+
+
+# -------------------------- 核心图片处理函数 --------------------------
+def get_image_info(img_path):
+    """获取图片的哈希值和尺寸信息"""
+    img = None
+    try:
+        img = Image.open(img_path)
+        # 计算感知哈希值
+        phash = str(imagehash.phash(img.convert('L'), hash_size=HASH_SIZE))
+        # 获取图片尺寸
+        width, height = img.size
+        return phash, (width, height)
+    except UnidentifiedImageError:
+        print(f"❌ 无法识别图片: {os.path.basename(img_path)}")
+        return None, None
+    except Exception as e:
+        print(f"❌ 处理图片失败 {os.path.basename(img_path)}: {e}")
+        return None, None
+    finally:
+        if img:
+            img.close()
+            del img
+        release_file_cache()
+
+
+def safe_copy_file(src_path, dst_path):
+    """安全复制文件，处理权限和重试逻辑"""
+    remove_readonly_attr(src_path)
+    
+    # 复制文件（带重试）
+    for retry in range(RETRY_TIMES):
+        try:
+            shutil.copy2(src_path, dst_path)
+            return True
+        except Exception as e:
+            if retry < RETRY_TIMES - 1:
+                print(f"⚠️ 复制失败 {os.path.basename(src_path)}，重试中... ({retry+1}/{RETRY_TIMES})")
+                time.sleep(RETRY_DELAY)
+            else:
+                print(f"❌ 复制失败 {os.path.basename(src_path)}: {e}")
+                return False
+
+
+def delete_source_file(src_path):
+    """安全删除源文件，处理权限和重试逻辑"""
+    remove_readonly_attr(src_path)
+    
+    # 删除文件（带重试）
+    for retry in range(RETRY_TIMES):
+        try:
+            release_file_cache()
+            os.remove(src_path)
+            return True
+        except PermissionError:
+            if retry < RETRY_TIMES - 1:
+                print(f"⚠️ 删除失败 {os.path.basename(src_path)}，文件被占用，重试中... ({retry+1}/{RETRY_TIMES})")
+                time.sleep(RETRY_DELAY)
+            else:
+                print(f"❌ 删除失败 {os.path.basename(src_path)}: 文件被占用")
+                with open(os.path.join(TARGET_FOLDER, "delete_failed.txt"), "a", encoding="utf-8") as f:
+                    f.write(f"{src_path}\n")
+                return False
+        except Exception as e:
+            if retry < RETRY_TIMES - 1:
+                print(f"⚠️ 删除失败 {os.path.basename(src_path)}，重试中... ({retry+1}/{RETRY_TIMES})")
+                time.sleep(RETRY_DELAY)
+            else:
+                print(f"❌ 删除失败 {os.path.basename(src_path)}: {e}")
+                with open(os.path.join(TARGET_FOLDER, "delete_failed.txt"), "a", encoding="utf-8") as f:
+                    f.write(f"{src_path}\n")
+                return False
+
+
+# -------------------------- 主函数 --------------------------
+def main():
+    # 创建目标文件夹
+    os.makedirs(TARGET_FOLDER, exist_ok=True)
+    
+    # 初始化统计变量
+    total_files = 0
+    processed_files = 0
+    duplicate_files = 0
+    filtered_files = 0
+    moved_files = 0
+    error_files = 0
+    
+    # 存储已处理的图片哈希值
+    processed_hashes = set()
+    
+    print("=" * 60)
+    print("📁 图片去重工具")
+    print(f"🔍 源文件夹: {SOURCE_FOLDER}")
+    print(f"📂 目标文件夹: {TARGET_FOLDER}")
+    print(f"🎯 支持格式: {SUPPORTED_FORMATS}")
+    print("=" * 60)
+    print()
+    
+    # 遍历源文件夹
+    for root, _, files in os.walk(SOURCE_FOLDER):
+        print(f"▶ 正在处理文件夹: {root}")
+        
+        for filename in files:
+            # 检查文件格式
+            if not filename.lower().endswith(SUPPORTED_FORMATS):
+                continue
+            
+            total_files += 1
+            file_path = os.path.join(root, filename)
+            
+            try:
+                # 获取图片信息
+                img_hash, (width, height) = get_image_info(file_path)
+                
+                if img_hash is None:
+                    error_files += 1
+                    continue
+                
+                # 过滤小尺寸图片
+                if width < MIN_PIXEL_COUNT or height < MIN_PIXEL_COUNT:
+                    filtered_files += 1
+                    print(f"🔍 过滤小尺寸图片: {filename} ({width}x{height})")
+                    continue
+                
+                # 检查是否重复
+                if img_hash in processed_hashes:
+                    duplicate_files += 1
+                    print(f"🔶 发现重复图片: {filename}")
+                    # 可以选择删除重复文件或保留
+                    # delete_source_file(file_path)
+                    continue
+                
+                # 处理目标文件命名
+                ext = os.path.splitext(filename)[1].lower()
+                # 使用哈希值+时间戳命名，避免冲突
+                target_filename = f"{img_hash}_{int(time.time())}{ext}"
+                target_path = os.path.join(TARGET_FOLDER, target_filename)
+                
+                # 复制文件到目标文件夹
+                if safe_copy_file(file_path, target_path):
+                    # 删除源文件
+                    delete_source_file(file_path)
+                    
+                    moved_files += 1
+                    processed_hashes.add(img_hash)
+                    print(f"✅ 已处理: {filename} → {target_filename}")
+                else:
+                    error_files += 1
+                    
+            except Exception as e:
+                error_files += 1
+                print(f"❌ 处理失败: {filename} - {e}")
+                continue
+    
+    # 生成统计报告
+    print()
+    print("=" * 60)
+    print("📊 去重完成统计报告")
+    print("=" * 60)
+    print(f"📁 总文件数: {total_files}")
+    print(f"✅ 成功去重并移动: {moved_files}")
+    print(f"🔶 重复文件数: {duplicate_files}")
+    print(f"🔍 过滤小尺寸文件: {filtered_files}")
+    print(f"❌ 处理失败文件: {error_files}")
+    print(f"💾 已处理哈希值数量: {len(processed_hashes)}")
+    print()
+    
+    # 检查是否有删除失败的文件
+    delete_failed_path = os.path.join(TARGET_FOLDER, "delete_failed.txt")
+    if os.path.exists(delete_failed_path):
+        with open(delete_failed_path, "r", encoding="utf-8") as f:
+            failed_count = len(f.readlines())
+        print(f"⚠️ 有 {failed_count} 个文件删除失败，详见: {delete_failed_path}")
+    
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/face_detect.py b/face_detect.py
new file mode 100644
index 0000000..0e20005
--- /dev/null
+++ b/face_detect.py
@@ -0,0 +1,151 @@
+import os
+import shutil
+import platform
+import torch
+from ultralytics import YOLO
+
+# ======================== 核心参数配置（请根据自己的路径修改！！！） ========================
+SOURCE_IMG_DIR = r"D:\img_todo\all_img_result"  # 去重后的图片源文件夹
+HAVE_FACE_DIR = r"D:\img_todo\good"            # 检测出有人脸的图片输出目录
+NO_FACE_DIR = r"D:\img_todo\bad"              # 无有效人脸的图片输出目录
+CONF_THRESHOLD = 0.9                           # 置信度阈值，大于等于90%才判定为人脸
+MODEL_PATH = 'face.pt'          # YOLOv8人脸检测模型路径
+# ========================================================================================
+
+
+def detect_device():
+    """
+    自动检测并选择最佳运行设备
+    返回：设备字符串 (cpu/mps/0)
+    """
+    print("🔍 正在检测可用计算设备...")
+    
+    # 检查操作系统类型
+    system = platform.system()
+    print(f"💻 操作系统: {system}")
+    
+    # 优先检查Mac设备（MPS）
+    if system == "Darwin":  # macOS
+        if torch.backends.mps.is_available():
+            print("✅ 检测到Mac MPS GPU，将使用MPS加速")
+            return "mps"
+        else:
+            print("⚠️  Mac MPS不可用，将使用CPU")
+            return "cpu"
+    
+    # 检查CUDA GPU（Windows/Linux）
+    if torch.cuda.is_available():
+        gpu_count = torch.cuda.device_count()
+        gpu_name = torch.cuda.get_device_name(0)
+        print(f"✅ 检测到CUDA GPU ({gpu_count}个): {gpu_name}")
+        print(f"   CUDA版本: {torch.version.cuda}")
+        return 0  # 使用第一个GPU
+    
+    # 所有GPU都不可用时，使用CPU
+    print("⚠️  未检测到可用GPU，将使用CPU")
+    return "cpu"
+
+
+def main():
+    """
+    人脸检测主函数：对去重后的图片进行批量人脸检测
+    输出：包含人脸的图片集合（保存在HAVE_FACE_DIR目录）
+    """
+    # 创建目标文件夹（如果不存在则自动创建）
+    os.makedirs(HAVE_FACE_DIR, exist_ok=True)
+    os.makedirs(NO_FACE_DIR, exist_ok=True)
+
+    # 自动检测最佳运行设备
+    device = detect_device()
+    
+    # 加载YOLOv8人脸检测专用预训练模型
+    print(f"\n🚀 正在加载人脸检测模型: {MODEL_PATH}")
+    try:
+        model = YOLO(MODEL_PATH)
+        print(f"✅ 模型加载成功: {model.model.__class__.__name__}")
+        print(f"🔧 当前使用设备: {device}")
+    except Exception as e:
+        print(f"❌ 模型加载失败: {e}")
+        return
+
+    # 定义需要处理的图片后缀（常用格式全覆盖）
+    SUPPORT_IMG_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tif', '.tiff']
+
+    # 遍历源文件夹下的所有文件
+    img_count = 0
+    have_face_count = 0
+    
+    print(f"\n📁 开始处理图片文件夹: {SOURCE_IMG_DIR}")
+    print(f"🎯 人脸检测置信度阈值: {CONF_THRESHOLD * 100:.0f}%")
+    print(f"📂 有人脸图片输出目录: {HAVE_FACE_DIR}")
+    print(f"📂 无有效人脸图片输出目录: {NO_FACE_DIR}")
+    print("=" * 60)
+
+    try:
+        for file_name in os.listdir(SOURCE_IMG_DIR):
+            # 获取文件完整路径和后缀
+            file_path = os.path.join(SOURCE_IMG_DIR, file_name)
+            file_suffix = os.path.splitext(file_name)[1].lower()
+
+            # 只处理图片文件
+            if file_suffix not in SUPPORT_IMG_FORMATS:
+                continue
+
+            img_count += 1
+            print(f"\n📷 正在检测第{img_count}张图片: {file_name}")
+
+            try:
+                # 执行人脸检测：核心推理，只返回置信度≥CONF_THRESHOLD的结果
+                results = model(file_path, conf=CONF_THRESHOLD, device=device, verbose=False)
+
+                # 获取当前图片的检测结果：人脸标签+置信度
+                det_boxes = results[0].boxes  # 检测到的目标框集合
+                detect_info = []
+                for box in det_boxes:
+                    cls_name = model.names[int(box.cls)]  # 检测的标签名称（人脸模型只有一个标签：face）
+                    conf_score = round(float(box.conf), 4)  # 置信度，保留4位小数
+                    detect_info.append(f"{cls_name} - {conf_score * 100:.2f}%")
+
+                # 打印每张图片的所有检测标签及置信度
+                if detect_info:
+                    print(f"🔍 检测到的标签及置信度: {detect_info}")
+                else:
+                    print(f"🔍 检测到的标签及置信度: 无符合条件的检测结果")
+
+                # 核心判断逻辑：置信度≥90% 才判定有人脸
+                if len(det_boxes) > 0:
+                    # 有人脸：移动到有人脸文件夹
+                    dest_path = os.path.join(HAVE_FACE_DIR, file_name)
+                    shutil.move(file_path, dest_path)
+                    print(f"✅ 判定结果：置信度≥90%，检测到人脸 → 已移动至 {os.path.basename(HAVE_FACE_DIR)}")
+                    have_face_count += 1
+                else:
+                    # 无人脸/置信度不足90%：移动到无人脸文件夹
+                    dest_path = os.path.join(NO_FACE_DIR, file_name)
+                    shutil.move(file_path, dest_path)
+                    print(f"🔶 判定结果：无有效人脸（置信度<90%）→ 已移动至 {os.path.basename(NO_FACE_DIR)}")
+
+            except Exception as e:
+                # 异常处理：单张图片出错不影响整体批量处理
+                print(f"⚠️  图片 {file_name} 处理失败: {str(e)} → 跳过该图片")
+                continue
+
+    except KeyboardInterrupt:
+        print("\n\n⚠️  用户中断了程序执行")
+    except Exception as e:
+        print(f"\n\n❌ 程序执行出错: {e}")
+    finally:
+        # 批量处理完成，打印统计信息
+        print("\n" + "=" * 60)
+        print("📊 人脸检测完成统计报告")
+        print("=" * 60)
+        print(f"📁 总共检测图片数量: {img_count} 张")
+        print(f"✅ 检测出有人脸(置信度≥90%)的图片数量: {have_face_count} 张")
+        print(f"🔶 无有效人脸的图片数量: {img_count - have_face_count} 张")
+        print(f"📂 有人脸图片保存路径: {HAVE_FACE_DIR}")
+        print(f"📂 无有效人脸图片保存路径: {NO_FACE_DIR}")
+        print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file