基于SDPose-Wholebody的Python爬虫数据增强自动标注视频姿态数据1. 引言你有没有遇到过这样的困扰想要训练一个姿态识别模型却苦于没有足够的标注数据手动标注视频中的每一帧不仅耗时耗力而且容易出错。今天我要分享的这套方案正好能解决这个问题。通过Python爬虫获取网络视频资源再用SDPose-Wholebody这个强大的133关键点姿态估计模型进行自动标注你就能快速构建高质量的训练数据集。整个过程完全自动化不需要人工干预大大提升了数据准备的效率。本文将手把手带你完成整个流程从视频采集到标注生成让你轻松获得成千上万的标注样本。2. 环境准备与工具安装首先我们需要搭建工作环境。这个方案主要依赖几个核心工具# 创建虚拟环境 conda create -n pose_annotation python3.10 conda activate pose_annotation # 安装基础依赖 pip install opencv-python numpy requests beautifulsoup4 pip install torch torchvision对于视频爬虫部分我们使用requests和BeautifulSoup来抓取和解析网页内容import requests from bs4 import BeautifulSoup import cv2 import os # 创建数据目录 os.makedirs(videos, exist_okTrue) os.makedirs(frames, exist_okTrue) os.makedirs(annotations, exist_okTrue)接下来安装SDPose-Wholebody相关的依赖# 克隆SDPose仓库 git clone https://github.com/t-s-liang/SDPose-OOD.git cd SDPose-OOD # 安装MMPose和相关依赖 pip install -r requirements.txt # 下载预训练模型 wget https://huggingface.co/teemosliang/SDPose-Wholebody/resolve/main/sdpose_wholebody.pth -P models/3. 视频爬虫与帧提取优化3.1 智能视频爬虫设计写爬虫时最头疼的就是遇到反爬机制。这里分享几个实用的技巧def download_video(url, save_path): headers { User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36, Referer: https://www.example.com/, Accept: video/webm,video/ogg,video/*;q0.9,application/octet-stream;q0.8, } try: response requests.get(url, headersheaders, streamTrue, timeout30) if response.status_code 200: with open(save_path, wb) as f: for chunk in response.iter_content(chunk_size8192): f.write(chunk) return True except Exception as e: print(f下载失败: {e}) return False3.2 高效帧提取策略直接处理整个视频很耗资源我们需要智能抽帧def extract_key_frames(video_path, output_dir, interval10): cap cv2.VideoCapture(video_path) frame_count 0 saved_count 0 while True: ret, frame cap.read() if not ret: break if frame_count % interval 0: # 只保存有关键人物的帧 if has_human_subject(frame): frame_path os.path.join(output_dir, fframe_{saved_count:06d}.jpg) cv2.imwrite(frame_path, frame) saved_count 1 frame_count 1 cap.release() return saved_count def has_human_subject(frame): # 简单的基于运动或颜色的检测 gray cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) edges cv2.Canny(gray, 100, 200) return np.sum(edges) 10000 # 阈值可根据实际情况调整4. 批量姿态估计与加速技巧4.1 SDPose-Wholebody快速部署SDPose-Wholebody支持133个关键点包括身体、手部、面部和脚部比传统的17点模型详细得多from mmpose.apis import inference_topdown, init_model from mmpose.structures import PoseDataSample # 初始化模型 model init_model( configconfigs/sdpose_wholebody.py, checkpointmodels/sdpose_wholebody.pth, devicecuda:0 # 使用GPU加速 ) def batch_pose_estimation(image_paths): results [] for img_path in image_paths: # 批量推理 result inference_topdown(model, img_path) results.append(result) return results4.2 推理加速技巧处理大量帧时这些优化技巧能显著提升速度# 使用批量处理 def process_batch(frames_batch): # 统一调整尺寸减少计算量 resized_batch [cv2.resize(frame, (1024, 768)) for frame in frames_batch] # 批量推理 with torch.no_grad(): results model(resized_batch) return results # 使用多进程 from multiprocessing import Pool def parallel_processing(frame_paths, num_workers4): with Pool(num_workers) as p: results p.map(process_single_frame, frame_paths) return results5. COCO格式标注文件生成5.1 标注数据结构设计COCO格式是业界标准方便后续模型训练def create_coco_template(): coco_template { info: { description: Auto-generated pose dataset, version: 1.0, year: 2024, contributor: SDPose-Wholebody Auto Annotation, date_created: 2024-01-01 }, licenses: [{id: 1, name: Academic Use}], images: [], annotations: [], categories: [{ id: 1, name: person, keypoints: [...] # 133个关键点名称 }] } return coco_template5.2 自动标注生成将SDPose的输出转换为COCO格式def convert_to_coco_format(pose_results, image_info): annotations [] image_id image_info[id] for i, result in enumerate(pose_results): keypoints result.pred_instances.keypoints[0] # 获取第一个人的关键点 scores result.pred_instances.keypoint_scores[0] annotation { id: len(annotations) 1, image_id: image_id, category_id: 1, keypoints: keypoints.flatten().tolist(), num_keypoints: int(np.sum(scores 0.3)), # 只统计置信度高的点 area: calculate_bbox_area(keypoints), bbox: get_bbox_from_keypoints(keypoints), iscrowd: 0 } annotations.append(annotation) return annotations def get_bbox_from_keypoints(keypoints): # 从关键点计算边界框 x_coords keypoints[:, 0] y_coords keypoints[:, 1] x_min, x_max np.min(x_coords), np.max(x_coords) y_min, y_max np.min(y_coords), np.max(y_coords) width x_max - x_min height y_max - y_min # 添加一些边距 margin 20 return [x_min - margin, y_min - margin, width 2*margin, height 2*margin]6. 完整流程整合与实战演示6.1 端到端自动化流水线现在我们把所有步骤整合起来def auto_annotation_pipeline(video_urls, output_dir): # 1. 下载视频 video_paths [] for url in video_urls: path download_video(url, os.path.join(output_dir, videos)) if path: video_paths.append(path) # 2. 提取关键帧 all_frames [] for video_path in video_paths: frames extract_key_frames(video_path, os.path.join(output_dir, frames)) all_frames.extend(frames) # 3. 批量姿态估计 results batch_pose_estimation(all_frames) # 4. 生成COCO标注 coco_data create_coco_template() for i, (frame_path, result) in enumerate(zip(all_frames, results)): # 添加图像信息 image_info { id: i, file_name: os.path.basename(frame_path), width: 1024, height: 768 } coco_data[images].append(image_info) # 添加标注信息 annotations convert_to_coco_format(result, image_info) coco_data[annotations].extend(annotations) # 5. 保存标注文件 with open(os.path.join(output_dir, annotations/instances_train.json), w) as f: json.dump(coco_data, f, indent2) print(f标注完成共处理 {len(all_frames)} 帧图像)6.2 实际效果验证让我们看看生成的数据质量def validate_annotations(annotation_path, frames_dir): with open(annotation_path, r) as f: coco_data json.load(f) # 随机抽样检查 sample_indices random.sample(range(len(coco_data[images])), min(5, len(coco_data[images]))) for idx in sample_indices: img_info coco_data[images][idx] img_path os.path.join(frames_dir, img_info[file_name]) # 可视化检查 img cv2.imread(img_path) annotations [ann for ann in coco_data[annotations] if ann[image_id] img_info[id]] for ann in annotations: keypoints np.array(ann[keypoints]).reshape(-1, 3) for x, y, score in keypoints: if score 0.3: # 只绘制高置信度点 cv2.circle(img, (int(x), int(y)), 3, (0, 255, 0), -1) cv2.imshow(Validation, img) cv2.waitKey(1000) cv2.destroyAllWindows()7. 总结这套基于SDPose-Wholebody的自动标注方案确实能大大提升数据准备的效率。实际使用下来处理一个小时的视频大概能生成几千个高质量的标注样本而且关键点标注的准确度相当不错。需要注意的是虽然自动化程度很高但还是建议对生成的数据进行抽样检查特别是对于特殊姿势或遮挡较多的情况。有时候模型可能会漏检或者误检这时候可以适当调整置信度阈值或者增加后处理步骤。如果你想要更好的效果可以考虑用少量人工标注的数据对SDPose进行微调这样在特定场景下的表现会更好。另外对于不同的应用场景可能需要对关键点的选择进行调整比如健身动作识别可能更关注关节角度而舞蹈动作可能更关注整体姿态。获取更多AI镜像想探索更多AI镜像和应用场景访问 CSDN星图镜像广场提供丰富的预置镜像覆盖大模型推理、图像生成、视频生成、模型微调等多个领域支持一键部署。