路子

先透过百度普通OCR辨识相片文本信息，对文本信息结点，看与否有关连的关键字信息。如果辨识不行，在邻近地区的tesseract在辨识一场，辨识不出在标准化放两个产品目录中，育苗处理。

进行分类前：

百度在线识图网页入口官网，百度识图在线使用网页版下载（百度识图网页版在线）难以置信微商销售话术定制家具销售话术美容销售话术美容养生销售话术抗日电视剧排行榜十大韩国电影排行榜建材电话销售话术百科资讯第1张

百度在线识图网页入口官网，百度识图在线使用网页版下载（百度识图网页版在线）难以置信微商销售话术定制家具销售话术美容销售话术美容养生销售话术抗日电视剧排行榜十大韩国电影排行榜建材电话销售话术百科资讯第2张

进行分类后：

百度在线识图网页入口官网，百度识图在线使用网页版下载（百度识图网页版在线）难以置信微商销售话术定制家具销售话术美容销售话术美容养生销售话术抗日电视剧排行榜十大韩国电影排行榜建材电话销售话术百科资讯第3张

百度在线识图网页入口官网，百度识图在线使用网页版下载（百度识图网页版在线）难以置信微商销售话术定制家具销售话术美容销售话术美容养生销售话术抗日电视剧排行榜十大韩国电影排行榜建材电话销售话术百科资讯第4张

标识符同时实现

#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2022-7-8 下午 9:09 # @Author : damon.guo # @File : picToClass.py # @Software: PyCharm import cv2 from PIL import Image import pytesseractimportos,sys import shutil from aip import AipOcr gettaglist = ["诊断书", "调查报告", "疗养院", "卫生所", "服务中心"] # 辨识关键字 def picToClass(picturePath): # imageCode = cv2.imread(r"F:\xbl\11\\12.jpg") # Nanded，二值化 # picturePath=r"F:\xbl\11\\preclass\\1.jpg" # Nanded，二值化 image = cv2.imread(picturePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(gray, 75, 200) tmpfilename = r"F:\xbl\11\\class\\tmp.jpg" # 当文件 cv2.imwrite(tmpfilename, edged) text = pytesseract.image_to_string(Image.open(tmpfilename), lang=chi_sim) print("二次辨识",picturePath) for i in text.split("\n"): index = getTagIndex(i) if index != -1: news = i[:index] print("nes", news) return news.strip(" ").strip(":").strip("%") # 却是辨识不出来，标准化放两个产品目录 return "other" def getclient(APP_ID, API_KEY, SECRET_KEY): """ 你的 APPID AK SK """ # APP_ID = 你的 App ID # API_KEY = 你的 Api Key # SECRET_KEY = 你的 Secret Key client = AipOcr(APP_ID, API_KEY, SECRET_KEY) return client def get_file_content(imagepath): with open(imagepath, rb) as fp: return fp.read() def ocrsdk(imagepath): #先采用百度sdk辨识 client = getclient("xx", "xx", "xx") image = get_file_content(imagepath) # 必填参数 options = {} # options["templateSign"] = "" # 模板id # options["detect_direction"] = "true" #与否手动转向 # options["probability"] = "true" # options["language_type"] = "CHN_ENG" # 辨识语言 res_image = client.basicAccurate(image,options) print("辨识相片：",image) tag = baiduocrreslut(res_image) if tag == "other": # 无法辨识在 # 在邻近地区ocr辨识一场 tag = picToClass(imagepath) return tag def getTagIndex(strs): # 辨识关键字在位置，取到索引，并向后多取2位 for i in gettaglist: if i in strs: index = strs.index(i) return index+2 return -1 defcopyFile(src,dst): shutil.copy(src, dst) def baiduocrreslut(res): # 百度辨识结果解析，wordlist = res["words_result"] for w in wordlist: for i in gettaglist: if i in w["words"]: print(w["words"]) if len(w["words"]) >= 4: return w[words] return "other" def main(): path = r"F:\xbl\preclass" # 待进行分类相片产品目录 classpath = r"F:\xbl\class" # 进行分类后主产品目录 listf = os.listdir(path) for i in listf: imagepath = os.path.join(path,i) tag = ocrsdk(imagepath) tagpath = os.path.join(classpath,tag) if not os.path.exists(tagpath): os.makedirs(tagpath) newimagepath = os.path.join(tagpath,i) if not os.path.exists(newimagepath): shutil.copy(imagepath,newimagepath) if __name__ == "__main__": main()

发表评论：取消回复

原文链接：https://zazhiba.com.cn/post/20380.html

=========================================

https://zazhiba.com.cn/ 为 “自由随风” 唯一官方服务平台，请勿相信其他任何渠道。