Tesseract.js 是一个javascript库,可以从图像中获取几乎任何语言的单词,支持文本转pdf功能,精准度很高。
1. 安装
npm install tesseract.js
2. 示例代码(vue3版)
<template>
<div class="container">
<div class="l_box">
<el-image class="c_img" :src="url" fit="contain" />
<div class="btn_box">
<el-button type="primary" @click="getImgText" style="margin-right: 10px;" :disabled="loading">解 析</el-button>
<el-upload class="upload-demo" :limit="1" :on-change="handleChange" accept=".jpg, .jpeg, .png, .bmp"
:show-file-list="false" :auto-upload="false">
<el-button type="primary" :disabled="loading">上 传</el-button>
</el-upload>
<el-button type="primary" @click="download" style="margin-left: 10px;" :disabled="loading">下载PDF</el-button>
</div>
</div>
<pre class="c_value" v-loading="loading">{{ word }}</pre>
</div>
</template>
<script setup>
import { ElMessage } from 'element-plus'
import { createWorker } from 'tesseract.js';
let url = ref('https://tesseract.projectnaptha.com/img/eng_bw.png')
let word = ref('')
let loading = ref(false)
let worker = ref(null)
let pdf = ref(null)
onMounted(() => {
init()
})
onUnmounted(() => {
// 卸载插件
worker.value.terminate()
})
const init = async () => {
// 初始化插件
worker.value = await createWorker(['eng', 'chi_sim'], 1, {
logger: m => console.log(m),
});
}
// 获取图片链接文本
const getImgText = async () => {
loading.value = true
try {
const { data } = await worker.value.recognize(url.value, { pdfTitle: 'Example PDF' }, { pdf: true });
pdf.value = data.pdf
word.value = data.text
loading.value = false
} catch (error) {
loading.value = false
ElMessage({
message: '解析失败',
type: 'warning',
})
}
}
// 上传附件解析
const handleChange = async (file) => {
url.value = URL.createObjectURL(file.raw)
getImgText()
}
// 下载PDF
const download = () => {
const blob = new Blob([new Uint8Array(pdf.value)], { type: 'application/pdf' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = 'example.pdf';
link.click();
URL.revokeObjectURL(url);
}
</script>
<style scoped lang="scss">
.container {
width: 100%;
height: 100%;
padding: 20px;
display: flex;
justify-content: space-between;
box-sizing: border-box;
.l_box {
width: 48%;
height: 100%;
margin-right: 2%;
.c_img {
width: 100%;
height: calc(100% - 40px);
}
.btn_box {
height: 50px;
display: flex;
align-items: center;
}
}
.c_value {
width: 50%;
min-height: 500px;
border: 1px solid #999;
line-height: 30px;
padding: 20px;
}
}
</style>
效果图
更多api可以去官网尝试了 tesseract.projectnaptha.com 。