|
|
|
@ -23,6 +23,8 @@ func init() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
var (
|
|
|
|
|
|
|
|
apiKey = "oEr6D60u6mmJRPlAHEIx8dWN"
|
|
|
|
|
|
|
|
secretKey = "r7fYVHExzsqlYO9P2kgdvs5N1WVJL8vB"
|
|
|
|
imageDir = "/Users/leo/Documents/au-ocr/image/"
|
|
|
|
imageDir = "/Users/leo/Documents/au-ocr/image/"
|
|
|
|
ocrAPIURL = "http://127.0.0.1:5005/ocr"
|
|
|
|
ocrAPIURL = "http://127.0.0.1:5005/ocr"
|
|
|
|
outputExcelPath = "/Users/leo/Documents/au-ocr/excel/"
|
|
|
|
outputExcelPath = "/Users/leo/Documents/au-ocr/excel/"
|
|
|
|
@ -48,6 +50,7 @@ var auSimCmd = &cobra.Command{
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
panic(exception.New(err.Error()))
|
|
|
|
panic(exception.New(err.Error()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tokenBaidu := getBaiduAccessToken()
|
|
|
|
results := []ExtractedInfo{}
|
|
|
|
results := []ExtractedInfo{}
|
|
|
|
for _, file := range files {
|
|
|
|
for _, file := range files {
|
|
|
|
if file.IsDir() || !strings.HasSuffix(file.Name(), ".jpg") || strings.Contains(file.Name(), "thumb.jpg") || strings.Contains(file.Name(), "hd.jpg") {
|
|
|
|
if file.IsDir() || !strings.HasSuffix(file.Name(), ".jpg") || strings.Contains(file.Name(), "thumb.jpg") || strings.Contains(file.Name(), "hd.jpg") {
|
|
|
|
@ -58,12 +61,32 @@ var auSimCmd = &cobra.Command{
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
panic(exception.New("转base64失败:" + err.Error()))
|
|
|
|
panic(exception.New("转base64失败:" + err.Error()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//第一次调用本地的
|
|
|
|
ocrText := callOCR(base64Str)
|
|
|
|
ocrText := callOCR(base64Str)
|
|
|
|
|
|
|
|
logkit.Info("成功调用本地OCR")
|
|
|
|
if err != nil {
|
|
|
|
if err != nil {
|
|
|
|
panic(exception.New("OCR请求失败:" + err.Error()))
|
|
|
|
panic(exception.New("OCR请求失败:" + err.Error()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
info := extractInfoFromOCR(ocrText)
|
|
|
|
info := extractInfoFromOCR(ocrText)
|
|
|
|
|
|
|
|
//只要返回结果中存在空值的,就再调用另外的接口
|
|
|
|
|
|
|
|
if info.Name == "" || info.Phone == "" || info.School == "" || info.Amount == "" {
|
|
|
|
|
|
|
|
ocrTextBaidu := callOCRBaidu(base64Str, tokenBaidu)
|
|
|
|
|
|
|
|
logkit.Info("¥成功调用百度OCR")
|
|
|
|
|
|
|
|
infoBaidu := extractInfoFromOCR(ocrTextBaidu)
|
|
|
|
|
|
|
|
//合并
|
|
|
|
|
|
|
|
if info.Name == "" {
|
|
|
|
|
|
|
|
info.Name = infoBaidu.Name
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.Phone == "" {
|
|
|
|
|
|
|
|
info.Phone = infoBaidu.Phone
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.School == "" {
|
|
|
|
|
|
|
|
info.School = infoBaidu.School
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.Amount == "" {
|
|
|
|
|
|
|
|
info.Amount = infoBaidu.Amount
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
//号码里包含了这些敏感数字
|
|
|
|
//号码里包含了这些敏感数字
|
|
|
|
if info.Phone != "" && (strings.Contains(info.Phone, "200") || strings.Contains(info.Phone, "100") || strings.Contains(info.Phone, "50")) {
|
|
|
|
if info.Phone != "" && (strings.Contains(info.Phone, "200") || strings.Contains(info.Phone, "100") || strings.Contains(info.Phone, "50")) {
|
|
|
|
if info.Name != "" {
|
|
|
|
if info.Name != "" {
|
|
|
|
@ -143,7 +166,6 @@ func callOCR(b64 string) []string {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bodyStr := string(resp.Body())
|
|
|
|
bodyStr := string(resp.Body())
|
|
|
|
// 快速检查 resultcode 是否为 200
|
|
|
|
// 快速检查 resultcode 是否为 200
|
|
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode() != 200 {
|
|
|
|
if resp.StatusCode() != 200 {
|
|
|
|
panic(exception.New(fmt.Errorf("OCR failed: %s", gjson.Get(bodyStr, "message").String()).Error()))
|
|
|
|
panic(exception.New(fmt.Errorf("OCR failed: %s", gjson.Get(bodyStr, "message").String()).Error()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@ -168,11 +190,10 @@ func callOCR(b64 string) []string {
|
|
|
|
func extractInfoFromOCR(texts []string) ExtractedInfo {
|
|
|
|
func extractInfoFromOCR(texts []string) ExtractedInfo {
|
|
|
|
combined := strings.Join(texts, "")
|
|
|
|
combined := strings.Join(texts, "")
|
|
|
|
info := ExtractedInfo{}
|
|
|
|
info := ExtractedInfo{}
|
|
|
|
|
|
|
|
|
|
|
|
// 提取手机号
|
|
|
|
// 提取手机号
|
|
|
|
re := regexp.MustCompile(`(?:XH[^\x00-\xff]|H势|势).*?(\d{11})`)
|
|
|
|
re := regexp.MustCompile(`(?:XH[^\x00-\xff]|H势|势|XH).*?(\d{11})`)
|
|
|
|
match := re.FindStringSubmatch(combined)
|
|
|
|
match := re.FindStringSubmatch(combined)
|
|
|
|
if len(match) == 2 {
|
|
|
|
if len(match) >= 2 {
|
|
|
|
info.Phone = match[1]
|
|
|
|
info.Phone = match[1]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -243,7 +264,7 @@ func extractInfoFromOCR(texts []string) ExtractedInfo {
|
|
|
|
if info.Amount == "" {
|
|
|
|
if info.Amount == "" {
|
|
|
|
re2 := regexp.MustCompile(`充值[::]?\s*(100|200|50)\b`)
|
|
|
|
re2 := regexp.MustCompile(`充值[::]?\s*(100|200|50)\b`)
|
|
|
|
match2 := re2.FindStringSubmatch(combined)
|
|
|
|
match2 := re2.FindStringSubmatch(combined)
|
|
|
|
if len(match2) == 2 {
|
|
|
|
if len(match2) >= 2 {
|
|
|
|
info.Amount = match2[1]
|
|
|
|
info.Amount = match2[1]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@ -264,13 +285,19 @@ func extractInfoFromOCR(texts []string) ExtractedInfo {
|
|
|
|
"2004", // 不匹配
|
|
|
|
"2004", // 不匹配
|
|
|
|
"充值200", // 匹配
|
|
|
|
"充值200", // 匹配
|
|
|
|
"充值2004", // 不匹配
|
|
|
|
"充值2004", // 不匹配
|
|
|
|
"abc100xyz", // 不匹配
|
|
|
|
"abc100xyz", // 匹配
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
if info.Amount == "" {
|
|
|
|
if info.Amount == "" {
|
|
|
|
re3 := regexp.MustCompile(`\b(100|200|50)\b`)
|
|
|
|
re3 := regexp.MustCompile(`100|200|50`)
|
|
|
|
match3 := re3.FindStringSubmatch(combined)
|
|
|
|
allMatches := re3.FindAllStringIndex(combined, -1)
|
|
|
|
if len(match3) == 2 {
|
|
|
|
for _, loc := range allMatches {
|
|
|
|
info.Amount = match3[1]
|
|
|
|
start, end := loc[0], loc[1]
|
|
|
|
|
|
|
|
beforeOK := start == 0 || (combined[start-1] < '0' || combined[start-1] > '9')
|
|
|
|
|
|
|
|
afterOK := end == len(combined) || (combined[end] < '0' || combined[end] > '9')
|
|
|
|
|
|
|
|
if beforeOK && afterOK {
|
|
|
|
|
|
|
|
info.Amount = combined[start:end]
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -327,3 +354,57 @@ func writeExcel(data []ExtractedInfo, filename string) {
|
|
|
|
|
|
|
|
|
|
|
|
f.SaveAs(filename)
|
|
|
|
f.SaveAs(filename)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func getBaiduAccessToken() string {
|
|
|
|
|
|
|
|
client := resty.New()
|
|
|
|
|
|
|
|
url := "https://aip.baidubce.com/oauth/2.0/token"
|
|
|
|
|
|
|
|
data := map[string]string{
|
|
|
|
|
|
|
|
"grant_type": "client_credentials",
|
|
|
|
|
|
|
|
"client_id": apiKey,
|
|
|
|
|
|
|
|
"client_secret": secretKey,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := client.R().
|
|
|
|
|
|
|
|
SetHeader("Content-Type", "application/x-www-form-urlencoded").
|
|
|
|
|
|
|
|
SetFormData(data).
|
|
|
|
|
|
|
|
Post(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
panic(exception.New("获取AccessToken失败:" + err.Error()))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
accessToken := gjson.Get(resp.String(), "access_token").String()
|
|
|
|
|
|
|
|
return accessToken
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func callOCRBaidu(b64, token string) []string {
|
|
|
|
|
|
|
|
url := "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + token
|
|
|
|
|
|
|
|
// 构造表单数据
|
|
|
|
|
|
|
|
payload := map[string]string{
|
|
|
|
|
|
|
|
"image": b64,
|
|
|
|
|
|
|
|
"multidirectional_recognize": "true",
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
client := resty.New()
|
|
|
|
|
|
|
|
resp, err := client.R().
|
|
|
|
|
|
|
|
SetHeader("Content-Type", "application/x-www-form-urlencoded").
|
|
|
|
|
|
|
|
SetHeader("Accept", "application/json").
|
|
|
|
|
|
|
|
SetFormData(payload).
|
|
|
|
|
|
|
|
Post(url)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
|
|
panic(exception.New("请求百度OCR出错:" + err.Error()))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 使用 gjson 解析结果
|
|
|
|
|
|
|
|
result := resp.String()
|
|
|
|
|
|
|
|
var texts []string
|
|
|
|
|
|
|
|
textResults := gjson.Get(result, "words_result")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if !textResults.Exists() || !textResults.IsArray() {
|
|
|
|
|
|
|
|
panic(exception.New(fmt.Errorf("no OCR results found").Error()))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
textResults.ForEach(func(_, value gjson.Result) bool {
|
|
|
|
|
|
|
|
text := value.Get("words").String()
|
|
|
|
|
|
|
|
if text != "" {
|
|
|
|
|
|
|
|
texts = append(texts, text)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
return texts
|
|
|
|
|
|
|
|
}
|
|
|
|
|