54、使用Array()简化find_words方法

使用Array()简化#find_words方法

1. 引言

在编写Ruby代码时,确保方法能够处理各种类型的输入是一项重要的技能。特别是在处理字符串、单个元素或数组时,使用 Array() 转换函数可以极大地简化方法的逻辑并增强代码的鲁棒性。本文将详细探讨如何在 #find_words 方法中使用 Array() 来处理不同类型的输入,确保方法在面对多变的输入时具有一致的行为。

2. 理解Array()转换函数

Array() 是Ruby中的一种内置转换函数,它可以将不同类型的输入转换为数组。无论输入是字符串、单个元素还是数组, Array() 都能确保输出是一个数组。以下是 Array() 的常见用法:

  • 字符串 Array("hello") -> ["hello"]
  • 单个元素 Array(42) -> [42]
  • 数组 Array([1, 2, 3]) -> [1, 2, 3]

2.1 使用Array()的优势

通过使用 Arr

import os import sys import cv2 from cv2 import resize import numpy as np import matplotlib.pyplot as plt import argparse from PIL import Image import torch import src.utils as utils import src.dataset as dataset import crnn.seq2seq as crnn def seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length): decoded_words = [] alph = "ABCDEFGHIJKLMNOPQRSTUVWXYZŽŠŪ-\'" converter = utils.ConvertBetweenStringAndLabel(alph) prob = 1.0 for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_out) probs = torch.exp(decoder_output) _, topi = decoder_output.data.topk(1) ni = topi.squeeze(1) decoder_input = ni prob *= probs[:, ni] if ni == utils.EOS_TOKEN: break else: decoded_words.append(converter.decode(ni)) words = ''.join(decoded_words) prob = prob.item() return words, prob def find_median(array_vals): array_vals.sort() mid = len(array_vals) // 2 return array_vals[mid] def detect_centerline(array_vals): max_val = max(array_vals) index_list = [index for index in range(len(array_vals)) if array_vals[index] == max_val] return find_median(index_list) def rotate_image(image, angle): image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) return result def extract_peak_ranges_from_array(array_vals, minimum_val=100, minimum_range=2): start_i = None end_i = None peak_ranges = [] for i, val in enumerate(array_vals): if val >= minimum_val and start_i is None: start_i = i elif val >= minimum_val and start_i is not None: pass elif val < minimum_val and start_i is not None: end_i = i if end_i - start_i > minimum_range: peak_ranges.append((start_i, end_i)) start_i = None end_i = None elif val < minimum_val and start_i is None: pass else: raise ValueError("Cannot Parse") return peak_ranges parser = argparse.ArgumentParser() parser.add_argument('--img_path', type=str, default='', help='the path of the input image') parser.add_argument('--rot_angle', type=int, default=0, help='the global rotation image') parser.add_argument('--padding', type=int, default=10, help='paddings at the head of the image') parser.add_argument('--block_size', type=int, default=33, help='threshold for binarizing image, odd number only') parser.add_argument('--threshold', type=int, default=32, help='radius to calculate the average for thresholding, even number only') parser.add_argument('--vertical_minimum', type=int, default=800, help='minimal brightness of each VERTICAL line') parser.add_argument('--word_minimum', type=int, default=200, help='minimal brightness of each WORD') parser.add_argument('--blur', type=bool, default=False, help='apply blur to words?') parser.add_argument('--pretrained', type=int, default=1, help='which pretrained model to use') cfg = parser.parse_args() def main(): global_rot_angle = cfg.rot_angle global_padding = cfg.padding imagename = cfg.img_path if cfg.pretrained == 0: my_encoder = "./model/encoder_0.pth" my_decoder = "./model/decoder_0.pth" elif cfg.pretrained == 1: my_encoder = "./model/encoder_1.pth" my_decoder = "./model/decoder_1.pth" else: sys.exit("Unknown Pretrained Model!") print("Analyzing: "+imagename) alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZŽŠŪ-\'" print("Using Möllendorff Alphabet List: " + alphabet + "\n") # len(alphabet) + SOS_TOKEN + EOS_TOKEN num_classes = len(alphabet) + 2 transformer = dataset.ResizeNormalize(img_width=480, img_height=64) image_color = cv2.imread(imagename) image_shape = (image_color.shape[0], image_color.shape[1]) image_binary = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY) image = cv2.rotate(image_binary, cv2.ROTATE_90_COUNTERCLOCKWISE) adaptive_threshold = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, cfg.block_size, cfg.threshold) adaptive_threshold = rotate_image(adaptive_threshold, global_rot_angle) adaptive_threshold = cv2.copyMakeBorder(adaptive_threshold, 20, 20, 20, 20, cv2.BORDER_CONSTANT, 0) adaptive_threshold = adaptive_threshold[10:adaptive_threshold.shape[0]-10, 10:adaptive_threshold.shape[1]-10] image_blur = cv2.GaussianBlur(adaptive_threshold,(3,3),cv2.BORDER_DEFAULT) cv2.imshow('Binary Image', cv2.rotate(adaptive_threshold, cv2.ROTATE_90_CLOCKWISE)) cv2.waitKey(1) vertical_sum = np.sum(image_blur, axis=1) peak_ranges = extract_peak_ranges_from_array(vertical_sum,minimum_val=cfg.vertical_minimum,minimum_range=5) img_display = np.copy(adaptive_threshold) #peak_ranges.append((peak_ranges[-1][1],adaptive_threshold.shape[0])) peak_ranges.reverse() horizontal_peak_ranges2d = [] for peak_range in peak_ranges: start_y = 0 end_y = img_display.shape[1] image_x = image_blur[peak_range[0]:peak_range[1], start_y:end_y] horizontal_sum = np.sum(image_x,axis = 0) # plt.plot(horizontal_sum, range(horizontal_sum.shape[0])) # plt.gca().invert_yaxis() # plt.show() horizontal_peak_ranges = extract_peak_ranges_from_array(horizontal_sum,minimum_val=cfg.word_minimum,minimum_range=5) horizontal_peak_ranges2d.append(horizontal_peak_ranges) for hor in horizontal_peak_ranges: cv2.rectangle(img_display, (hor[0], peak_range[0]), (hor[1], peak_range[1]), 140, 1) word_piece = adaptive_threshold[peak_range[0]:peak_range[1],hor[0]:hor[1]] if cfg.blur: word_piece = cv2.GaussianBlur(word_piece,(1,1),cv2.BORDER_DEFAULT) else: pass image_dimension = (word_piece.shape[0], word_piece.shape[1]) #cv2.imshow('Words', word_piece) #print(word_piece.shape) if image_dimension[0] < 30 or image_dimension[1] < 20: pass else: factor = 1 image_resized = cv2.resize(word_piece, (int(image_dimension[1]*factor),int(image_dimension[0]*factor)), interpolation = cv2.INTER_AREA) hor_sum = np.sum(image_resized, axis=1) ctr_line = detect_centerline(hor_sum) image_dimension_new = (image_resized.shape[0], image_resized.shape[1]) add_padding = max([ctr_line, image_dimension_new[0]-ctr_line]) # cv2.imshow('current Image', image_resized) # cv2.waitKey(0) if image_dimension_new[1]<=500: padded = cv2.copyMakeBorder(image_resized, add_padding-ctr_line, add_padding-image_dimension_new[0]+ctr_line, 0, 0, cv2.BORDER_CONSTANT, 0) else: padded = image_resized factor = 64/padded.shape[0] padded = cv2.resize(padded, (int(padded.shape[1]*factor),int(padded.shape[0]*factor)), interpolation = cv2.INTER_AREA) padded = cv2.copyMakeBorder(padded, 0, 0, global_padding, 480 - global_padding - padded.shape[0], cv2.BORDER_CONSTANT, 0) padded = Image.fromarray(np.uint8(padded)).convert('L') padded = transformer(padded) padded = padded.view(1, *padded.size()) padded = torch.autograd.Variable(padded) encoder = crnn.Encoder(1, 1024) # no dropout during inference decoder = crnn.Decoder(1024, num_classes, dropout_p=0.0, max_length=121) map_location = 'cpu' encoder.load_state_dict(torch.load(my_encoder, map_location=map_location)) decoder.load_state_dict(torch.load(my_decoder, map_location=map_location)) encoder.eval() decoder.eval() encoder_out = encoder(padded) max_length = 121 decoder_input = torch.zeros(1).long() decoder_hidden = decoder.initHidden(1) words, prob = seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length) print(words+" ", end = '') print("\n") cv2.destroyAllWindows() cv2.imshow('Current Line', cv2.rotate(img_display, cv2.ROTATE_90_CLOCKWISE)) cv2.waitKey(1) input("Reading Completed, Press Any Key to Exit. Ambula Baniha.") # color = (0, 0, 255) # for i, peak_range in enumerate(peak_ranges): # for horizontal_range in horizontal_peak_ranges2d[i]: # x = peak_range[0] # y = horizontal_range[0] # w = peak_range[1] # h = horizontal_range[1] # patch = adaptive_threshold[x:w,y:h] # cv2.rectangle(img_display, (y,x), (h,w), 255, 2) # # print(cnt) # # cv2.imwrite("/Users/zhuohuizhang/Downloads/ManchuOCR/Data/"+fontname+"/Result/"+'%d' %cnt + '.jpg', patch) # cnt += 1 # # cv2.imshow('Vertical Segmented Image', line_seg_blur) # cv2.waitKey(0) if __name__ == "__main__": main() 在该代码上进行修改,可以以图形化的方式展现(要求使用tkinter),在保证上面代码所有功能可以正常使用的情况下可以图形化调用上面代码和参数:--img_path(该参数是必填,该选项是选择图片)、--rot_angle(旋转角度,默认是0,选填)、--padding(图形头部的填充,默认值是10,选填)、--block_size(图像二值化阈值,仅限奇数,默认值是33,选填)、--threshold(计算阈值平均值的半径,仅限偶数,默认值32,选填)、--vertical_minimum(每条垂直线的最小亮度,默认值800,选填)、--word_minimum(每个单词的最小亮度,默认值200,选填)、--blur(对文字应用模糊效果?,默认值False,选填),输入参数后点击确认即可将上述代码打印的信息显示出来,并且可以打包为exe文件
09-27
import os import sys import cv2 from cv2 import resize import numpy as np import matplotlib.pyplot as plt import argparse from PIL import Image import torch import src.utils as utils import src.dataset as dataset import crnn.seq2seq as crnn def seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length): decoded_words = [] alph = "ABCDEFGHIJKLMNOPQRSTUVWXYZŽŠŪ-\'" converter = utils.ConvertBetweenStringAndLabel(alph) prob = 1.0 for di in range(max_length): decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_out) probs = torch.exp(decoder_output) _, topi = decoder_output.data.topk(1) ni = topi.squeeze(1) decoder_input = ni prob *= probs[:, ni] if ni == utils.EOS_TOKEN: break else: decoded_words.append(converter.decode(ni)) words = ''.join(decoded_words) prob = prob.item() return words, prob def find_median(array_vals): array_vals.sort() mid = len(array_vals) // 2 return array_vals[mid] def detect_centerline(array_vals): max_val = max(array_vals) index_list = [index for index in range(len(array_vals)) if array_vals[index] == max_val] return find_median(index_list) def rotate_image(image, angle): image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) return result def extract_peak_ranges_from_array(array_vals, minimum_val=100, minimum_range=2): start_i = None end_i = None peak_ranges = [] for i, val in enumerate(array_vals): if val >= minimum_val and start_i is None: start_i = i elif val >= minimum_val and start_i is not None: pass elif val < minimum_val and start_i is not None: end_i = i if end_i - start_i > minimum_range: peak_ranges.append((start_i, end_i)) start_i = None end_i = None elif val < minimum_val and start_i is None: pass else: raise ValueError("Cannot Parse") return peak_ranges parser = argparse.ArgumentParser() parser.add_argument('--img_path', type=str, default='', help='the path of the input image') parser.add_argument('--rot_angle', type=int, default=0, help='the global rotation image') parser.add_argument('--padding', type=int, default=10, help='paddings at the head of the image') parser.add_argument('--block_size', type=int, default=33, help='threshold for binarizing image, odd number only') parser.add_argument('--threshold', type=int, default=32, help='radius to calculate the average for thresholding, even number only') parser.add_argument('--vertical_minimum', type=int, default=800, help='minimal brightness of each VERTICAL line') parser.add_argument('--word_minimum', type=int, default=200, help='minimal brightness of each WORD') parser.add_argument('--blur', type=bool, default=False, help='apply blur to words?') parser.add_argument('--pretrained', type=int, default=1, help='which pretrained model to use') cfg = parser.parse_args() def main(): global_rot_angle = cfg.rot_angle global_padding = cfg.padding imagename = cfg.img_path if cfg.pretrained == 0: my_encoder = "./model/encoder_0.pth" my_decoder = "./model/decoder_0.pth" elif cfg.pretrained == 1: my_encoder = "./model/encoder_1.pth" my_decoder = "./model/decoder_1.pth" else: sys.exit("Unknown Pretrained Model!") print("Analyzing: "+imagename) alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZŽŠŪ-\'" print("Using Möllendorff Alphabet List: " + alphabet + "\n") # len(alphabet) + SOS_TOKEN + EOS_TOKEN num_classes = len(alphabet) + 2 transformer = dataset.ResizeNormalize(img_width=480, img_height=64) image_color = cv2.imread(imagename) image_shape = (image_color.shape[0], image_color.shape[1]) image_binary = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY) image = cv2.rotate(image_binary, cv2.ROTATE_90_COUNTERCLOCKWISE) adaptive_threshold = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, cfg.block_size, cfg.threshold) adaptive_threshold = rotate_image(adaptive_threshold, global_rot_angle) adaptive_threshold = cv2.copyMakeBorder(adaptive_threshold, 20, 20, 20, 20, cv2.BORDER_CONSTANT, 0) adaptive_threshold = adaptive_threshold[10:adaptive_threshold.shape[0]-10, 10:adaptive_threshold.shape[1]-10] image_blur = cv2.GaussianBlur(adaptive_threshold,(3,3),cv2.BORDER_DEFAULT) cv2.imshow('Binary Image', cv2.rotate(adaptive_threshold, cv2.ROTATE_90_CLOCKWISE)) cv2.waitKey(1) vertical_sum = np.sum(image_blur, axis=1) peak_ranges = extract_peak_ranges_from_array(vertical_sum,minimum_val=cfg.vertical_minimum,minimum_range=5) img_display = np.copy(adaptive_threshold) #peak_ranges.append((peak_ranges[-1][1],adaptive_threshold.shape[0])) peak_ranges.reverse() horizontal_peak_ranges2d = [] for peak_range in peak_ranges: start_y = 0 end_y = img_display.shape[1] image_x = image_blur[peak_range[0]:peak_range[1], start_y:end_y] horizontal_sum = np.sum(image_x,axis = 0) # plt.plot(horizontal_sum, range(horizontal_sum.shape[0])) # plt.gca().invert_yaxis() # plt.show() horizontal_peak_ranges = extract_peak_ranges_from_array(horizontal_sum,minimum_val=cfg.word_minimum,minimum_range=5) horizontal_peak_ranges2d.append(horizontal_peak_ranges) for hor in horizontal_peak_ranges: cv2.rectangle(img_display, (hor[0], peak_range[0]), (hor[1], peak_range[1]), 140, 1) word_piece = adaptive_threshold[peak_range[0]:peak_range[1],hor[0]:hor[1]] if cfg.blur: word_piece = cv2.GaussianBlur(word_piece,(1,1),cv2.BORDER_DEFAULT) else: pass image_dimension = (word_piece.shape[0], word_piece.shape[1]) #cv2.imshow('Words', word_piece) #print(word_piece.shape) if image_dimension[0] < 30 or image_dimension[1] < 20: pass else: factor = 1 image_resized = cv2.resize(word_piece, (int(image_dimension[1]*factor),int(image_dimension[0]*factor)), interpolation = cv2.INTER_AREA) hor_sum = np.sum(image_resized, axis=1) ctr_line = detect_centerline(hor_sum) image_dimension_new = (image_resized.shape[0], image_resized.shape[1]) add_padding = max([ctr_line, image_dimension_new[0]-ctr_line]) # cv2.imshow('current Image', image_resized) # cv2.waitKey(0) if image_dimension_new[1]<=500: padded = cv2.copyMakeBorder(image_resized, add_padding-ctr_line, add_padding-image_dimension_new[0]+ctr_line, 0, 0, cv2.BORDER_CONSTANT, 0) else: padded = image_resized factor = 64/padded.shape[0] padded = cv2.resize(padded, (int(padded.shape[1]*factor),int(padded.shape[0]*factor)), interpolation = cv2.INTER_AREA) padded = cv2.copyMakeBorder(padded, 0, 0, global_padding, 480 - global_padding - padded.shape[0], cv2.BORDER_CONSTANT, 0) padded = Image.fromarray(np.uint8(padded)).convert('L') padded = transformer(padded) padded = padded.view(1, *padded.size()) padded = torch.autograd.Variable(padded) encoder = crnn.Encoder(1, 1024) # no dropout during inference decoder = crnn.Decoder(1024, num_classes, dropout_p=0.0, max_length=121) map_location = 'cpu' encoder.load_state_dict(torch.load(my_encoder, map_location=map_location)) decoder.load_state_dict(torch.load(my_decoder, map_location=map_location)) encoder.eval() decoder.eval() encoder_out = encoder(padded) max_length = 121 decoder_input = torch.zeros(1).long() decoder_hidden = decoder.initHidden(1) words, prob = seq2seq_decode(encoder_out, decoder, decoder_input, decoder_hidden, max_length) print(words+" ", end = '') print("\n") cv2.destroyAllWindows() cv2.imshow('Current Line', cv2.rotate(img_display, cv2.ROTATE_90_CLOCKWISE)) cv2.waitKey(1) input("Reading Completed, Press Any Key to Exit. Ambula Baniha.") # color = (0, 0, 255) # for i, peak_range in enumerate(peak_ranges): # for horizontal_range in horizontal_peak_ranges2d[i]: # x = peak_range[0] # y = horizontal_range[0] # w = peak_range[1] # h = horizontal_range[1] # patch = adaptive_threshold[x:w,y:h] # cv2.rectangle(img_display, (y,x), (h,w), 255, 2) # # print(cnt) # # cv2.imwrite("/Users/zhuohuizhang/Downloads/ManchuOCR/Data/"+fontname+"/Result/"+'%d' %cnt + '.jpg', patch) # cnt += 1 # # cv2.imshow('Vertical Segmented Image', line_seg_blur) # cv2.waitKey(0) if __name__ == "__main__": main() 根据上面代码打包的readmanchu.exe创建一个新的图形化py脚本,新的图形化py脚本可以执行readmanchu.exe和参数:--img_path(该参数是必填,该选项是选择图片)、--rot_angle(旋转角度,默认是0,选填)、--padding(图形头部的填充,默认值是10,选填)、--block_size(图像二值化阈值,仅限奇数,默认值是33,选填)、--threshold(计算阈值平均值的半径,仅限偶数,默认值32,选填)、--vertical_minimum(每条垂直线的最小亮度,默认值800,选填)、--word_minimum(每个单词的最小亮度,默认值200,选填)、--blur(对文字应用模糊效果?,默认值False,选填,如果填'1'对应的就是True,如果填'0'对应的就是False,要求该参数也是填写项,填写1或0)、----pretrained(使用哪个预训练模型?默认值1,int类型),输入参数后点击确认即可将readmanchu.exe打印的信息显示出来,并将打印的信息输出到当前文件夹下的.\output\选中图片的名称.txt(自动在当前目录下创建目录和文件);点击确认后就会开始调用 readmanchu.exe,实际运行效果是:(举例) readmanchu.exe --img_path .\examples\001.png --blur False 就相当于后台调用cmd执行readmanchu.exe文件
09-28
源码地址: https://pan.quark.cn/s/d1f41682e390 miyoubiAuto 米游社每日米游币自动化Python脚本(务必使用Python3) 8更新:更换cookie的获取地址 注意:禁止在B站、贴吧、或各大论坛大肆传播! 作者已退游,项目不维护了。 如果有能力的可以pr修复。 小引一波 推荐关注几个非常可爱有趣的女孩! 欢迎B站搜索: @嘉然今天吃什么 @向晚大魔王 @乃琳Queen @贝拉kira 第三方库 食用方法 下载源码 在Global.py中设置米游社Cookie 运行myb.py 本地第一次运行时会自动生产一个文件储存cookie,请勿删除 当前仅支持单个账号! 获取Cookie方法 浏览器无痕模式打开 http://user.mihoyo.com/ ,登录账号 按,打开,找到并点击 按刷新页面,按下图复制 Cookie: How to get mys cookie 当触发时,可尝试按关闭,然后再次刷新页面,最后复制 Cookie。 也可以使用另一种方法: 复制代码 浏览器无痕模式打开 http://user.mihoyo.com/ ,登录账号 按,打开,找到并点击 控制台粘贴代码并运行,获得类似的输出信息 部分即为所需复制的 Cookie,点击确定复制 部署方法--腾讯云函数版(推荐! ) 下载项目源码和压缩包 进入项目文件夹打开命令行执行以下命令 xxxxxxx为通过上面方式或取得米游社cookie 一定要用双引号包裹!! 例如: png 复制返回内容(包括括号) 例如: QQ截图20210505031552.png 登录腾讯云函数官网 选择函数服务-新建-自定义创建 函数名称随意-地区随意-运行环境Python3....
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值