diff --git a/cmd/au_sim.go b/cmd/au_sim.go new file mode 100644 index 0000000..75931f0 --- /dev/null +++ b/cmd/au_sim.go @@ -0,0 +1,239 @@ +package cmd + +import ( + "encoding/base64" + "fmt" + "github.com/go-resty/resty/v2" + "github.com/mizuki1412/go-core-kit/class/exception" + "github.com/mizuki1412/go-core-kit/init/initkit" + "github.com/mizuki1412/go-core-kit/service/logkit" + "github.com/spf13/cobra" + "github.com/tidwall/gjson" + "github.com/xuri/excelize/v2" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" + "time" +) + +func init() { + rootCmd.AddCommand(auSimCmd) +} + +var ( + imageDir = "/Users/leo/Documents/au-ocr/image/" + ocrAPIURL = "http://172.16.5.160:8000/ocr/predict-by-base64/" + outputExcelPath = "/Users/leo/Documents/au-ocr/excel/" + + nameWhiteList = []string{"德宁", "鼎斌", "海波", "何晓璇", "胡鑫", "佳欣", "佳怡", "嘉乐", "建闽", "锦城", "景浩", "君豪", "凯彬", "兰青", "李想", "林艳", "刘美云", "裴雅妮", "任子健", "润宇", "隆蝶", "石明毅", "覃彩玉", "唐鑫", "唐宇豪", "童斌", "万兴凯", "王权", "吴宇峰", "武文迪", "夏晨阳", "项乐奇", "小锐", "小颖", "晓雪", "谢俊", "欣萍", "鑫杰", "徐宁", "许慧超", "雅妮", "杨传杰", "杨帅", "杨笑笑", "杨兴俊", "叶琪婷", "宇飞", "玉梅", "张钧帅", "张奕韬", "张玉", "张原硕", "章帅", "赵林冲", "郑佳欣", "朱菲玲", "子健", "邹思惠", "徐林焱", "周志乐", "林焱"} + schoolWhiteList = []string{"财经", "工商", "工业", "杭电", "计量", "金融", "经济", "经贸", "科技", "理工", "美院", "万向", "长征", "浙音", "万象", "特殊教育", "外国语"} + amountWhiteList = []string{"200", "100", "未充值"} +) + +type ExtractedInfo struct { + Name string + School string + Phone string + Amount string +} + +var auSimCmd = &cobra.Command{ + Use: "au", + Short: "Batch processing operations of the autumn semester sim card", + Run: func(cmd *cobra.Command, args []string) { + initkit.BindFlags(cmd) + files, err := ioutil.ReadDir(imageDir) + if err != nil { + panic(exception.New(err.Error())) + } + results := []ExtractedInfo{} + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".jpg") || strings.Contains(file.Name(), "thumb.jpg") || strings.Contains(file.Name(), "hd.jpg") { + continue + } + fullPath := filepath.Join(imageDir, file.Name()) + base64Str, err := imageToBase64(fullPath) + if err != nil { + panic(exception.New("转base64失败:" + err.Error())) + } + ocrText := callOCR(base64Str) + if err != nil { + panic(exception.New("OCR请求失败:" + err.Error())) + } + + info := extractInfoFromOCR(ocrText) + //号码里包含了这些敏感数字 + if info.Phone != "" && (strings.Contains(info.Phone, "200") || strings.Contains(info.Phone, "100") || strings.Contains(info.Phone, "50")) { + newName := "fail/" + file.Name() + ".phone.jpg" + newPath := filepath.Join(imageDir, newName) + os.Rename(fullPath, newPath) + logkit.Info("业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 号码里包含充值敏感数字,可能会误识别!") + continue + } + if info.Phone != "" && containsBroadband(ocrText) { + newName := "broadband/" + file.Name() + newPath := filepath.Join(imageDir, newName) + os.Rename(fullPath, newPath) + info.Name = info.Name + "+宽带" + logkit.Info(info.Phone + " √√√加宽带!√√√") + } + oldPath := fullPath + newName := "" + //条件放宽到识别出号码就算成功,剩下不成功的 打?手动 + if info.Phone != "" { + logkit.Info("业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 识别成功!") + newName = "success/" + fmt.Sprintf("%s%s.jpg", info.Name, info.Phone) + results = append(results, info) + } else { + logkit.Info("××× 业务员:" + info.Name + " 学校:" + info.School + " 首充:" + info.Amount + " 号码:" + info.Phone + " 识别失败!") + newName = "fail/" + file.Name() + ".error.jpg" + } + + newPath := filepath.Join(imageDir, newName) + os.Rename(oldPath, newPath) + + } + + writeExcel(results, outputExcelPath+"/"+time.Now().Format("20060102150405")+".xlsx") + }, +} + +func containsBroadband(texts []string) bool { + for _, text := range texts { + if strings.Contains(text, "宽带") { + return true + } + } + return false +} + +func imageToBase64(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(data), nil +} + +func callOCR(b64 string) []string { + client := resty.New() + resp, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetBody(map[string]string{ + "base64_str": b64, + }). + Post(ocrAPIURL) + + if err != nil { + panic(exception.New(err.Error())) + } + bodyStr := string(resp.Body()) + // 快速检查 resultcode 是否为 200 + if gjson.Get(bodyStr, "resultcode").Int() != 200 { + panic(exception.New(fmt.Errorf("OCR failed: %s", gjson.Get(bodyStr, "message").String()).Error())) + } + // 使用 GJSON 提取所有文本字段 + var texts []string + textResults := gjson.Get(bodyStr, "data.0") + if !textResults.Exists() { + panic(exception.New(fmt.Errorf("no OCR results found").Error())) + } + textResults.ForEach(func(_, line gjson.Result) bool { + if line.IsArray() && line.Array()[1].IsArray() { + text := line.Array()[1].Array()[0].String() + texts = append(texts, text) + } + return true + }) + return texts +} + +func extractInfoFromOCR(texts []string) ExtractedInfo { + combined := strings.Join(texts, "") + info := ExtractedInfo{} + + // 提取业务员姓名 + for _, name := range nameWhiteList { + if strings.Contains(combined, name) { + info.Name = name + context := getContext(combined, name, 30) + logkit.Info("【上下文 " + context + "】") + // 从上下文中查找学校 + for _, school := range schoolWhiteList { + if strings.Contains(context, school) { + info.School = school + break + } + } + + // 从上下文中查找金额 + for _, amount := range amountWhiteList { + if strings.Contains(context, amount) { + info.Amount = amount + break + } + } + + break + } + } + + // 提取手机号 + re := regexp.MustCompile(`(?:XH[^\x00-\xff]|H势|势).*?(\d{11})`) + match := re.FindStringSubmatch(combined) + if len(match) == 2 { + info.Phone = match[1] + } + + return info +} + +func getContext(text, keyword string, length int) string { + index := strings.Index(text, keyword) + if index == -1 { + return "" + } + start := index - length + if start < 0 { + start = 0 + } + end := index + len(keyword) + length + if end > len(text) { + end = len(text) + } + return text[start:end] +} + +func writeExcel(data []ExtractedInfo, filename string) { + f := excelize.NewFile() + sheet := "Sheet1" + f.SetSheetRow(sheet, "A1", &[]string{"学校", "业务员姓名", "手机号码", "充值金额"}) + + for i, d := range data { + if d.School == "万象" { + d.School = "万向" + } + if d.School == "外国语" { + d.School = "浙外" + } + if d.Amount == "未充值" { + d.Amount = "0" + } + if d.Amount == "" { + d.Amount = "?" + } + if d.Name == "" { + d.Name = "?" + } + if d.School == "" { + d.School = "?" + } + row := []string{d.School, d.Name, d.Phone, d.Amount} + cell, _ := excelize.CoordinatesToCellName(1, i+2) + f.SetSheetRow(sheet, cell, &row) + } + + f.SaveAs(filename) +} diff --git a/go.mod b/go.mod index 4b139be..7a1b1b7 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,6 @@ require ( github.com/pelletier/go-toml v1.9.4 // indirect github.com/richardlehane/mscfb v1.0.4 // indirect github.com/richardlehane/msoleps v1.0.3 // indirect - github.com/robfig/cron/v3 v3.0.1 // indirect github.com/rogpeppe/go-internal v1.8.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/spf13/afero v1.8.0 // indirect @@ -41,7 +40,6 @@ require ( github.com/xuri/efp v0.0.0-20240408161823-9ad904a10d6d // indirect github.com/xuri/nfp v0.0.0-20240318013403-ab9948c2c4a7 // indirect go.uber.org/atomic v1.9.0 // indirect - go.uber.org/automaxprocs v1.4.0 // indirect go.uber.org/multierr v1.7.0 // indirect go.uber.org/zap v1.20.0 // indirect golang.org/x/crypto v0.25.0 // indirect diff --git a/go.sum b/go.sum index 8cc9b1b..c23f659 100644 --- a/go.sum +++ b/go.sum @@ -333,8 +333,6 @@ github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7 github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= -github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= -github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= @@ -408,8 +406,6 @@ go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqe go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/automaxprocs v1.4.0 h1:CpDZl6aOlLhReez+8S3eEotD7Jx0Os++lemPlMULQP0= -go.uber.org/automaxprocs v1.4.0/go.mod h1:/mTEdr7LvHhs0v7mjdxDreTz1OG5zdZGqgOnhWiR/+Q= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=