银行账单语音播报助手
一、实际应用场景与痛点
应用场景
视障用户陈先生需要每月核对银行账单,了解自己的收支情况。传统的银行账单通常是纸质或PDF格式,包含复杂的表格、数字和术语,视障用户完全无法独立阅读。他每次都需要请家人或朋友帮忙,既麻烦又泄露隐私。一次,他不小心错过了信用卡还款,产生了不必要的利息费用。他急需一个能自动读取银行账单并语音播报收支情况的智能助手。
核心痛点
1. 账单格式复杂:PDF/Excel格式难以访问
2. 数字信息密集:金额、日期、账号等数字信息多
3. 专业术语难懂:银行术语对视障用户不友好
4. 隐私泄露风险:需他人协助查看财务信息
5. 实时性差:无法及时了解账户变动
6. 操作复杂:银行APP的无障碍支持不足
7. 错误风险:手动输入或记忆容易出错
二、核心逻辑设计
1. 输入银行账单文件(PDF/Excel/图片/短信)
2. OCR识别或直接提取文本内容
3. 智能解析账单结构
4. 提取关键信息(余额、交易记录、摘要)
5. 数据清洗和格式化
6. 生成易懂的语音播报内容
7. 语音合成播报
8. 提供问答和查询功能
9. 数据分析和趋势预测
10. 安全存储和隐私保护
三、模块化代码实现
主程序文件:bank_statement_voice_assistant.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
银行账单语音播报助手
将银行账单转换为语音播报,帮助视障用户了解收支情况
版本:3.0.0
作者:无障碍智能助手
"""
import os
import sys
import re
import json
import csv
import sqlite3
import threading
import queue
import hashlib
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional, Any, Union
from dataclasses import dataclass, asdict, field
from enum import Enum
import warnings
warnings.filterwarnings('ignore')
# 图像处理和OCR
try:
import cv2
import numpy as np
import pytesseract
from PIL import Image, ImageEnhance
CV2_AVAILABLE = True
TESSERACT_AVAILABLE = True
except ImportError:
CV2_AVAILABLE = False
TESSERACT_AVAILABLE = False
print("警告: 图像处理库未安装")
# PDF处理
try:
import PyPDF2
from pdf2image import convert_from_bytes
PDF_AVAILABLE = True
except ImportError:
PDF_AVAILABLE = False
print("警告: PDF处理库未安装")
# Excel处理
try:
import pandas as pd
import openpyxl
PANDAS_AVAILABLE = True
except ImportError:
PANDAS_AVAILABLE = False
print("警告: 数据处理库未安装")
# 语音合成
try:
import pyttsx3
TTS_AVAILABLE = True
except ImportError:
TTS_AVAILABLE = False
print("警告: 语音合成库未安装")
# 音频处理
try:
import simpleaudio as sa
import wave
AUDIO_AVAILABLE = True
except ImportError:
AUDIO_AVAILABLE = False
print("警告: 音频处理库未安装")
# 数据可视化(辅助功能)
try:
import matplotlib.pyplot as plt
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class BankType(Enum):
"""银行类型枚举"""
ICBC = "icbc" # 工商银行
CCB = "ccb" # 建设银行
ABC = "abc" # 农业银行
BOC = "boc" # 中国银行
CMB = "cmb" # 招商银行
BCM = "bcm" # 交通银行
CEB = "ceb" # 光大银行
CIB = "cib" # 兴业银行
PINGAN = "pingan" # 平安银行
CITIC = "citic" # 中信银行
SPDB = "spdb" # 浦发银行
OTHER = "other" # 其他银行
class TransactionType(Enum):
"""交易类型枚举"""
INCOME = "income" # 收入
EXPENSE = "expense" # 支出
TRANSFER_IN = "transfer_in" # 转入
TRANSFER_OUT = "transfer_out" # 转出
WITHDRAWAL = "withdrawal" # 取现
DEPOSIT = "deposit" # 存款
FEE = "fee" # 手续费
INTEREST = "interest" # 利息
REFUND = "refund" # 退款
UNKNOWN = "unknown" # 未知
class AccountType(Enum):
"""账户类型枚举"""
SAVINGS = "savings" # 储蓄账户
CHECKING = "checking" # 支票账户
CREDIT = "credit" # 信用卡
LOAN = "loan" # 贷款账户
INVESTMENT = "investment" # 投资账户
OTHER = "other" # 其他
@dataclass
class Transaction:
"""交易记录"""
id: str
date: datetime
description: str
amount: float
currency: str
transaction_type: TransactionType
category: str
balance_after: Optional[float] = None
account_name: Optional[str] = None
account_number: Optional[str] = None
counterparty: Optional[str] = None
reference: Optional[str] = None
location: Optional[str] = None
tags: List[str] = field(default_factory=list)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
'id': self.id,
'date': self.date.isoformat(),
'description': self.description,
'amount': self.amount,
'currency': self.currency,
'transaction_type': self.transaction_type.value,
'category': self.category,
'balance_after': self.balance_after,
'account_name': self.account_name,
'account_number': self.account_number,
'counterparty': self.counterparty,
'reference': self.reference,
'location': self.location,
'tags': self.tags
}
@property
def is_income(self) -> bool:
"""是否为收入"""
return self.transaction_type in [
TransactionType.INCOME,
TransactionType.TRANSFER_IN,
TransactionType.REFUND
]
@property
def is_expense(self) -> bool:
"""是否为支出"""
return self.transaction_type in [
TransactionType.EXPENSE,
TransactionType.TRANSFER_OUT,
TransactionType.WITHDRAWAL,
TransactionType.FEE
]
@dataclass
class BankAccount:
"""银行账户"""
id: str
bank_type: BankType
account_type: AccountType
account_number: str
account_name: str
balance: float
currency: str
last_updated: datetime
credit_limit: Optional[float] = None
available_credit: Optional[float] = None
due_date: Optional[datetime] = None
minimum_payment: Optional[float] = None
def to_dict(self) -> Dict:
"""转换为字典"""
return {
'id': self.id,
'bank_type': self.bank_type.value,
'account_type': self.account_type.value,
'account_number': self.account_number,
'account_name': self.account_name,
'balance': self.balance,
'currency': self.currency,
'last_updated': self.last_updated.isoformat(),
'credit_limit': self.credit_limit,
'available_credit': self.available_credit,
'due_date': self.due_date.isoformat() if self.due_date else None,
'minimum_payment': self.minimum_payment
}
@dataclass
class BankStatement:
"""银行对账单"""
id: str
account: BankAccount
period_start: datetime
period_end: datetime
transactions: List[Transaction]
opening_balance: float
closing_balance: float
total_income: float
total_expense: float
file_path: Optional[str] = None
parsed_at: datetime = field(default_factory=datetime.now)
def to_dict(self) -> Dict:
"""转换为字典"""
return {
'id': self.id,
'account': self.account.to_dict(),
'period_start': self.period_start.isoformat(),
'period_end': self.period_end.isoformat(),
'transactions': [t.to_dict() for t in self.transactions],
'opening_balance': self.opening_balance,
'closing_balance': self.closing_balance,
'total_income': self.total_income,
'total_expense': self.total_expense,
'file_path': self.file_path,
'parsed_at': self.parsed_at.isoformat()
}
def get_transactions_by_date(self, date: datetime) -> List[Transaction]:
"""获取某一天的交易记录"""
return [t for t in self.transactions
if t.date.date() == date.date()]
def get_transactions_by_type(self, trans_type: TransactionType) -> List[Transaction]:
"""按交易类型筛选"""
return [t for t in self.transactions
if t.transaction_type == trans_type]
def get_transactions_by_category(self, category: str) -> List[Transaction]:
"""按分类筛选"""
return [t for t in self.transactions
if t.category == category]
def get_top_expenses(self, limit: int = 10) -> List[Transaction]:
"""获取最大的支出"""
expenses = [t for t in self.transactions if t.is_expense]
return sorted(expenses, key=lambda x: x.amount, reverse=True)[:limit]
def get_top_incomes(self, limit: int = 10) -> List[Transaction]:
"""获取最大的收入"""
incomes = [t for t in self.transactions if t.is_income]
return sorted(incomes, key=lambda x: x.amount, reverse=True)[:limit]
class DocumentExtractor:
"""文档提取器"""
def __init__(self, config: Dict):
"""
初始化文档提取器
Args:
config: 提取器配置
"""
self.config = config
# OCR配置
self.ocr_config = config.get('ocr', {})
if TESSERACT_AVAILABLE:
# 设置Tesseract路径(如果需要)
tesseract_path = self.ocr_config.get('tesseract_path')
if tesseract_path and os.path.exists(tesseract_path):
pytesseract.pytesseract.tesseract_cmd = tesseract_path
# 缓存提取结果
self.extraction_cache = {}
def extract_text(self, filepath: str, use_cache: bool = True) -> Tuple[str, Dict]:
"""
提取文档文本
Args:
filepath: 文件路径
use_cache: 是否使用缓存
Returns:
(文本内容, 元数据)
"""
if not os.path.exists(filepath):
raise FileNotFoundError(f"文件不存在: {filepath}")
# 检查缓存
cache_key = self._get_file_hash(filepath)
if use_cache and cache_key in self.extraction_cache:
print(f"使用缓存结果: {filepath}")
return self.extraction_cache[cache_key]
ext = os.path.splitext(filepath)[1].lower()
if ext == '.txt':
text, metadata = self.extract_txt(filepath)
elif ext == '.pdf':
text, metadata = self.extract_pdf(filepath)
elif ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
text, metadata = self.extract_image(filepath)
elif ext in ['.xlsx', '.xls', '.csv']:
text, metadata = self.extract_spreadsheet(filepath)
else:
raise ValueError(f"不支持的格式: {ext}")
# 缓存结果
if use_cache:
self.extraction_cache[cache_key] = (text, metadata)
return text, metadata
def extract_txt(self, filepath: str) -> Tuple[str, Dict]:
"""提取纯文本"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
metadata = {
'format': 'txt',
'encoding': 'utf-8',
'size': len(content),
'chars': len(content),
'lines': content.count('\n') + 1
}
return content, metadata
except UnicodeDecodeError:
# 尝试其他编码
encodings = ['gbk', 'gb2312', 'big5', 'latin-1']
for encoding in encodings:
try:
with open(filepath, 'r', encoding=encoding) as f:
content = f.read()
metadata = {
'format': 'txt',
'encoding': encoding,
'size': len(content),
'chars': len(content),
'lines': content.count('\n') + 1
}
return content, metadata
except:
continue
raise ValueError("无法解码文本文件")
def extract_pdf(self, filepath: str) -> Tuple[str, Dict]:
"""提取PDF文本"""
if not PDF_AVAILABLE:
raise ImportError("PDF处理库未安装")
try:
text = ""
metadata = {}
with open(filepath, 'rb') as f:
pdf_reader = PyPDF2.PdfReader(f)
# 提取元数据
if pdf_reader.metadata:
metadata = {
'title': pdf_reader.metadata.get('/Title', ''),
'author': pdf_reader.metadata.get('/Author', ''),
'subject': pdf_reader.metadata.get('/Subject', ''),
'creator': pdf_reader.metadata.get('/Creator', ''),
'producer': pdf_reader.metadata.get('/Producer', ''),
'pages': len(pdf_reader.pages)
}
# 先尝试文本提取
for page_num, page in enumerate(pdf_reader.pages):
try:
page_text = page.extract_text()
if page_text and len(page_text.strip()) > 10:
text += page_text + "\n\n"
else:
# 文本太少,可能是扫描件,使用OCR
print(f"第{page_num+1}页文本过少,尝试OCR...")
page_image = self._pdf_page_to_image(f, page_num)
if page_image:
ocr_text = self._ocr_image(page_image)
text += ocr_text + "\n\n"
except Exception as e:
print(f"提取第{page_num+1}页失败: {e}")
metadata.update({
'format': 'pdf',
'chars': len(text),
'lines': text.count('\n') + 1,
'extraction_method': 'text+ocr'
})
return text, metadata
except Exception as e:
raise ValueError(f"PDF提取失败: {e}")
def extract_image(self, filepath: str) -> Tuple[str, Dict]:
"""提取图片文本"""
if not (CV2_AVAILABLE and TESSERACT_AVAILABLE):
raise ImportError("图像处理库未安装")
try:
# 读取并预处理图片
image = cv2.imread(filepath)
if image is None:
raise ValueError(f"无法读取图片: {filepath}")
# 预处理
processed = self._preprocess_image(image)
# OCR识别
text = pytesseract.image_to_string(
processed,
lang=self.ocr_config.get('language', 'chi_sim+eng'),
config=self.ocr_config.get('tesseract_config', '')
)
metadata = {
'format': os.path.splitext(filepath)[1][1:],
'width': image.shape[1],
'height': image.shape[0],
'channels': image.shape[2] if len(image.shape) > 2 else 1,
'chars': len(text),
'lines': text.count('\n') + 1
}
return text, metadata
except Exception as e:
raise ValueError(f"图片提取失败: {e}")
def extract_spreadsheet(self, filepath: str) -> Tuple[str, Dict]:
"""提取电子表格文本"""
if not PANDAS_AVAILABLE:
raise ImportError("数据处理库未安装")
try:
ext = os.path.splitext(filepath)[1].lower()
if ext == '.csv':
df = pd.read_csv(filepath, encoding='utf-8')
elif ext in ['.xlsx', '.xls']:
df = pd.read_excel(filepath)
else:
raise ValueError(f"不支持的表格格式: {ext}")
# 转换为文本
text_lines = []
# 添加表头
headers = list(df.columns)
text_lines.append("表格标题: " + ", ".join(headers))
# 添加数据
for idx, row in df.iterrows():
row_text = []
for col in df.columns:
value = row[col]
if pd.notna(value):
row_text.append(f"{col}: {value}")
if row_text:
text_lines.append(f"第{idx+1}行: " + "; ".join(row_text))
text = "\n".join(text_lines)
metadata = {
'format': ext[1:],
'rows': len(df),
'columns': len(df.columns),
'chars': len(text),
'lines': len(text_lines)
}
return text, metadata
except Exception as e:
raise ValueError(f"表格提取失败: {e}")
def _preprocess_image(self, image: np.ndarray) -> np.ndarray:
"""预处理图片提高OCR精度"""
# 转换为灰度
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# 去噪
denoised = cv2.fastNlMeansDenoising(gray)
# 二值化
_, binary = cv2.threshold(
denoised,
0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
# 形态学操作
kernel = np.ones((2, 2), np.uint8)
processed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
return processed
def _pdf_page_to_image(self, pdf_file, page_num: int) -> Optional[Image.Image]:
"""PDF页面转换为图片"""
try:
from pdf2image import convert_from_bytes
pdf_data = pdf_file.read()
images = convert_from_bytes(pdf_data, first_page=page_num+1, last_page=page_num+1)
return images[0] if images else None
except Exception as e:
print(f"PDF转图片失败: {e}")
return None
def _ocr_image(self, image: Image.Image) -> str:
"""OCR识别图片"""
if not TESSERACT_AVAILABLE:
return ""
# 增强对比度
enhancer = ImageEnhance.Contrast(image)
enhanced = enhancer.enhance(2.0)
# OCR识别
text = pytesseract.image_to_string(
enhanced,
lang=self.ocr_config.get('language', 'chi_sim+eng'),
config=self.ocr_config.get('tesseract_config', '--psm 6')
)
return text
def _get_file_hash(self, filepath: str) -> str:
"""计算文件哈希值"""
with open(filepath, 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
return file_hash
class BankStatementParser:
"""银行对账单解析器"""
def __init__(self, config: Dict):
"""
初始化解析器
Args:
config: 解析器配置
"""
self.config = config
self.bank_patterns = self.load_bank_patterns()
self.category_mapping = self.load_category_mapping()
def load_bank_patterns(self) -> Dict[BankType, Dict]:
"""加载银行识别模式"""
patterns_path = "config/bank_patterns.json"
default_patterns = self.get_default_patterns()
try:
if os.path.exists(patterns_path):
with open(patterns_path, 'r', encoding='utf-8') as f:
patterns = json.load(f)
else:
patterns = default_patterns
# 保存默认模式
os.makedirs(os.path.dirname(patterns_path), exist_ok=True)
with open(patterns_path, 'w', encoding='utf-8') as f:
json.dump(patterns, f, indent=2, ensure_ascii=False)
return patterns
except Exception as e:
print(f"加载银行模式失败,使用默认模式: {e}")
return default_patterns
def get_default_patterns(self) -> Dict:
"""获取默认银行模式"""
return {
"icbc": {
"name_keywords": ["工商银行", "ICBC"],
"account_pattern": r"账号[::]\s*(\d{16,19})",
"balance_pattern": r"余额[::]\s*([\d,]+\.?\d*)",
"date_pattern": r"日期[::]\s*(\d{4}[-/]\d{1,2}[-/]\d{1,2})",
"transaction_start": r"交易明细|流水记录",
"transaction_line": r"(\d{4}[-/]\d{1,2}[-/]\d{1,2})\s+([^\d\n]+?)\s+([\d,]+\.?\d*)\s+([\d,]+\.?\d*)"
},
"ccb": {
"name_keywords": ["建设银行", "CCB"],
"account_pattern": r"账号[::]\s*(\d{16,19})",
"balance_pattern": r"余额[::]\s*([\d,]+\.?\d*)",
"date_pattern": r"日期[::]\s*(\d{4}[-/]\d{1,2}[-/]\d{1,2})",
"transaction_start": r"交易明细|流水记录",
"transaction_line": r"(\d{4}[-/]\d{1,2}[-/]\d{1,2})\s+([^\d\n]+?)\s+(-?[\d,]+\.?\d*)"
},
"abc": {
"name_keywords":
如果你觉得这个工具好用,欢迎关注我!