make a better chinese character recognition OCR than tesseract
See README_en.md for English installation documentation.
ubuntuvirtualenv
git clone https://github.com/JinpengLI/deep_ocr.git ~/deep_ocr
virtualenv ~/deep_ocr_env
source ~/deep_ocr_env/bin/activate
pip install -r ~/deep_ocr/requirements.txt
cd ~/deep_ocr && python setup.py install
source ~/deep_ocr_env/bin/activate && cd ~/deep_ocr && ./bin/deep_ocr_reco data/holiday_notification.jpg -v -d
.
tesseract
$ tesseract -l chi_sim data/test_data.png out_test_data
v 3 (T) T ' 2t
v (2) T '2'L (3)
:v () v
reco_chars.pycaffetesseract
$ python reco_chars.py
caffe
Deep Convolutional Network for Handwritten Chinese Character Recognition
http://yuhao.im/files/Zhang_CNNChar.pdf
dockerUbuntu 14.04
https://www.docker.com/
deep_ocr_workspace.zip (https://pan.baidu.com/s/1nvz2wrB https://pan.baidu.com/s/1qYPKH3Y )
md5sum
$ md5sum deep_ocr_workspace.zip
ffeda7ea6604e7b8835c05a33fa0459e deep_ocr_workspace.zip
$ md5sum deep_ocr_workspace.z01
ea66796c2bbdb2bec9b7ee28eb44012d deep_ocr_workspace.z01
(~/deep_ocr_workspace)
cat deep_ocr_workspace.z* > unsplit_deep_ocr_workspace.zip
unzip unsplit_deep_ocr_workspace.zip -d ~/
zipdeep_ocr ~/deep_ocr_workspace
docker pull jinpengli/deep_ocr_cpu_docker:latest
docker container
docker run -ti --volume=${HOME}/deep_ocr_workspace:/workspace jinpengli/deep_ocr_cpu_docker:latest /bin/bash
cd /opt/deep_ocr
git pull origin master
volumemountcontainer
python /opt/deep_ocr/reco_chars.py
========
export WORKSPACE=/workspace
deep_ocr_id_card_reco --img $DEEP_OCR_ROOT/data/id_card_img.jpg --debug_path /tmp/debug --cls_sim ${WORKSPACE}/data/chongdata_caffe_cn_sim_digits_64_64 --cls_ua ${WORKSPACE}/data/chongdata_train_ualpha_digits_64_64
...
ocr res:
============================================================
name
============================================================
address
4
============================================================
month
12
============================================================
minzu
============================================================
year
1654
============================================================
sex
============================================================
id
1X21441114X221243X
============================================================
day
20