筛选京东晒单中包含人脸的图片

前提准备

安装好python3,执行下面的安装命令

pip3 install cmake
pip3 install face_recognition

第一步

用爬虫把商品下载下来,存到img文件夹

import requests
import re
import time
import os

def getImageCommentListUrl(productId, page):
    return "https://club.jd.com/discussion/getProductPageImageCommentList.action?productId={}&isShadowSku=0&page={}&pageSize=20".format(
        productId, page)
productIdList = [
    #T恤
    # 27445271201,
    # 27538901956,
    # 10007744648,
    # 25938518228,
    # 1817070267
    # 1353562846,
    # 11341812467
    10424222952,
    14859030226,
    1694242679
]
for productId in [str(x) for x in productIdList]:
    print(productId)
    imgConf = 's600x600_jfs'
    imgPath = productId + imgConf
    if not os.path.exists(imgPath):
        os.makedirs('img//'+imgPath)
    else:
        continue
    for page in range(1,1000):
        print(page)
        time.sleep(0.5)
        url = getImageCommentListUrl(productId, page)
        jsonData = requests.get(url).json()
        if len(jsonData['imgComments']['imgList']) == 0:
            break
        for item in jsonData['imgComments']['imgList']:
            imageUrl = 'http:'+item['imageUrl'].replace('/jfs/','/'+imgConf+'/')
            imageFile = 'img//{}//{}.jpg'.format(imgPath, item['imageId'])
            print(imageFile)
            with open(imageFile,'wb') as f:
                f.write(requests.get(imageUrl).content)

第二步

建立一个done文件夹并运行人脸识别程序

import face_recognition
import os
import shutil
path = 'D://python//jd'
#检测人脸位置
# image = face_recognition.load_image_file(path + "//img//27445271201//864354805.jpg")
# image = face_recognition.load_image_file(path + "//2a40b42b35ee6f6f.jpg")
# face_locations = face_recognition.face_locations(image)
#识别关键点五官
# image = face_recognition.load_image_file("your_file.jpg")
# face_landmarks_list = face_recognition.face_landmarks(image)
# print(face_locations)

for pathItem in os.scandir(path + '//img'):
    for fileItem in os.scandir(pathItem):
        if not fileItem.is_file():
            continue
        #path + '//img' + "//img//27445271201//864354805.jpg"
        imgPath = "{}//img//{}//{}".format(path, pathItem.name, fileItem.name)
        print(imgPath)
        try:
            image = face_recognition.load_image_file(imgPath)
        except:
            os.remove(imgPath)
            continue
        face_locations = face_recognition.face_locations(image)
        if len(face_locations) == 0:
            #没有检测到人脸的就删掉
            os.remove(imgPath)
    print(pathItem.name)
    print(path + '//done')
    shutil.move(path + '//img//' + pathItem.name, path + '//done')

识别结果

BaiduHi_2019-11-20_20-42-15.png

最后修改:2019 年 11 月 21 日 10 : 22 AM

2 条评论

  1. moozik

    可以看到识别还有有很多错误概率,他这个库主要是识别欧洲成年人,亚洲人脸型和小孩子识别的不好

    1. Angelic47
      @moozik

      似乎是因为这个库训练模型的时候样本没有覆盖到亚洲部分

发表评论