You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

240 lines
7.1 KiB
Go

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package cmd
import (
"encoding/base64"
"fmt"
"github.com/go-resty/resty/v2"
"github.com/mizuki1412/go-core-kit/class/exception"
"github.com/mizuki1412/go-core-kit/init/initkit"
"github.com/mizuki1412/go-core-kit/service/logkit"
"github.com/spf13/cobra"
"github.com/tidwall/gjson"
"github.com/xuri/excelize/v2"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)
func init() {
rootCmd.AddCommand(auSimCmd)
}
var (
imageDir = "/Users/leo/Documents/au-ocr/image/"
ocrAPIURL = "http://172.16.5.160:8000/ocr/predict-by-base64/"
outputExcelPath = "/Users/leo/Documents/au-ocr/excel/"
nameWhiteList = []string{"德宁", "鼎斌", "海波", "何晓璇", "胡鑫", "佳欣", "佳怡", "嘉乐", "建闽", "锦城", "景浩", "君豪", "凯彬", "兰青", "李想", "林艳", "刘美云", "裴雅妮", "任子健", "润宇", "隆蝶", "石明毅", "覃彩玉", "唐鑫", "唐宇豪", "童斌", "万兴凯", "王权", "吴宇峰", "武文迪", "夏晨阳", "项乐奇", "小锐", "小颖", "晓雪", "谢俊", "欣萍", "鑫杰", "徐宁", "许慧超", "雅妮", "杨传杰", "杨帅", "杨笑笑", "杨兴俊", "叶琪婷", "宇飞", "玉梅", "张钧帅", "张奕韬", "张玉", "张原硕", "章帅", "赵林冲", "郑佳欣", "朱菲玲", "子健", "邹思惠", "徐林焱", "周志乐", "林焱"}
schoolWhiteList = []string{"财经", "工商", "工业", "杭电", "计量", "金融", "经济", "经贸", "科技", "理工", "美院", "万向", "长征", "浙音", "万象", "特殊教育", "外国语"}
amountWhiteList = []string{"200", "100", "未充值"}
)
type ExtractedInfo struct {
Name string
School string
Phone string
Amount string
}
var auSimCmd = &cobra.Command{
Use: "au",
Short: "Batch processing operations of the autumn semester sim card",
Run: func(cmd *cobra.Command, args []string) {
initkit.BindFlags(cmd)
files, err := ioutil.ReadDir(imageDir)
if err != nil {
panic(exception.New(err.Error()))
}
results := []ExtractedInfo{}
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".jpg") || strings.Contains(file.Name(), "thumb.jpg") || strings.Contains(file.Name(), "hd.jpg") {
continue
}
fullPath := filepath.Join(imageDir, file.Name())
base64Str, err := imageToBase64(fullPath)
if err != nil {
panic(exception.New("转base64失败:" + err.Error()))
}
ocrText := callOCR(base64Str)
if err != nil {
panic(exception.New("OCR请求失败:" + err.Error()))
}
info := extractInfoFromOCR(ocrText)
//号码里包含了这些敏感数字
if info.Phone != "" && (strings.Contains(info.Phone, "200") || strings.Contains(info.Phone, "100") || strings.Contains(info.Phone, "50")) {
newName := "fail/" + file.Name() + ".phone.jpg"
newPath := filepath.Join(imageDir, newName)
os.Rename(fullPath, newPath)
logkit.Info("业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 号码里包含充值敏感数字,可能会误识别!")
continue
}
if info.Phone != "" && containsBroadband(ocrText) {
newName := "broadband/" + file.Name()
newPath := filepath.Join(imageDir, newName)
os.Rename(fullPath, newPath)
info.Name = info.Name + "+宽带"
logkit.Info(info.Phone + " √√√加宽带!√√√")
}
oldPath := fullPath
newName := ""
//条件放宽到识别出号码就算成功,剩下不成功的 打?手动
if info.Phone != "" {
logkit.Info("业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 识别成功!")
newName = "success/" + fmt.Sprintf("%s%s.jpg", info.Name, info.Phone)
results = append(results, info)
} else {
logkit.Info("××× 业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 识别失败!")
newName = "fail/" + file.Name() + ".error.jpg"
}
newPath := filepath.Join(imageDir, newName)
os.Rename(oldPath, newPath)
}
writeExcel(results, outputExcelPath+"/"+time.Now().Format("20060102150405")+".xlsx")
},
}
func containsBroadband(texts []string) bool {
for _, text := range texts {
if strings.Contains(text, "宽带") {
return true
}
}
return false
}
func imageToBase64(path string) (string, error) {
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
return base64.StdEncoding.EncodeToString(data), nil
}
func callOCR(b64 string) []string {
client := resty.New()
resp, err := client.R().
SetHeader("Content-Type", "application/json").
SetBody(map[string]string{
"base64_str": b64,
}).
Post(ocrAPIURL)
if err != nil {
panic(exception.New(err.Error()))
}
bodyStr := string(resp.Body())
// 快速检查 resultcode 是否为 200
if gjson.Get(bodyStr, "resultcode").Int() != 200 {
panic(exception.New(fmt.Errorf("OCR failed: %s", gjson.Get(bodyStr, "message").String()).Error()))
}
// 使用 GJSON 提取所有文本字段
var texts []string
textResults := gjson.Get(bodyStr, "data.0")
if !textResults.Exists() {
panic(exception.New(fmt.Errorf("no OCR results found").Error()))
}
textResults.ForEach(func(_, line gjson.Result) bool {
if line.IsArray() && line.Array()[1].IsArray() {
text := line.Array()[1].Array()[0].String()
texts = append(texts, text)
}
return true
})
return texts
}
func extractInfoFromOCR(texts []string) ExtractedInfo {
combined := strings.Join(texts, "")
info := ExtractedInfo{}
// 提取业务员姓名
for _, name := range nameWhiteList {
if strings.Contains(combined, name) {
info.Name = name
context := getContext(combined, name, 30)
logkit.Info("【上下文 " + context + "】")
// 从上下文中查找学校
for _, school := range schoolWhiteList {
if strings.Contains(context, school) {
info.School = school
break
}
}
// 从上下文中查找金额
for _, amount := range amountWhiteList {
if strings.Contains(context, amount) {
info.Amount = amount
break
}
}
break
}
}
// 提取手机号
re := regexp.MustCompile(`(?:XH[^\x00-\xff]|H势|势).*?(\d{11})`)
match := re.FindStringSubmatch(combined)
if len(match) == 2 {
info.Phone = match[1]
}
return info
}
func getContext(text, keyword string, length int) string {
index := strings.Index(text, keyword)
if index == -1 {
return ""
}
start := index - length
if start < 0 {
start = 0
}
end := index + len(keyword) + length
if end > len(text) {
end = len(text)
}
return text[start:end]
}
func writeExcel(data []ExtractedInfo, filename string) {
f := excelize.NewFile()
sheet := "Sheet1"
f.SetSheetRow(sheet, "A1", &[]string{"学校", "业务员姓名", "手机号码", "充值金额"})
for i, d := range data {
if d.School == "万象" {
d.School = "万向"
}
if d.School == "外国语" {
d.School = "浙外"
}
if d.Amount == "未充值" {
d.Amount = "0"
}
if d.Amount == "" {
d.Amount = "?"
}
if d.Name == "" {
d.Name = "?"
}
if d.School == "" {
d.School = "?"
}
row := []string{d.School, d.Name, d.Phone, d.Amount}
cell, _ := excelize.CoordinatesToCellName(1, i+2)
f.SetSheetRow(sheet, cell, &row)
}
f.SaveAs(filename)
}