mirror of
https://gitee.com/dcren/my-script-tools.git
synced 2025-04-05 17:37:50 +08:00
175 lines
6.1 KiB
Python
175 lines
6.1 KiB
Python
"""
|
||
修正微软拼音输入法无法添加多个格式化自定义短语的问题
|
||
Author: Scruel Tao
|
||
"""
|
||
import os
|
||
import re
|
||
import pathlib
|
||
import traceback
|
||
from pathlib import Path
|
||
|
||
# 自定义: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格
|
||
# 或也可在该脚本的同目录下,创建一个 phrases.txt,在其中以同一格式写入自定义短语
|
||
PHRASES_TEXT = """
|
||
dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
|
||
dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
|
||
time 1 %yyyy%%MM%%dd%%HH%%mm%%ss%
|
||
time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
|
||
date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒
|
||
""".strip()
|
||
|
||
LEX_FILE = os.path.join(os.getenv('APPDATA'),
|
||
r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex')
|
||
|
||
HEADER_LEN = 16 + 4
|
||
PHRASE_64PCNT_POS = HEADER_LEN
|
||
TOTAL_BYTES_POS = HEADER_LEN + 4
|
||
PHRASE_CNT_POS = HEADER_LEN + 8
|
||
|
||
PADDED_ENCODING = 'utf-16le'
|
||
HEADER_BYTES = bytes('mschxudp', encoding='ascii')
|
||
HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING)
|
||
PHRASE_SEPARATOR_BYTES = b'\x00\x00'
|
||
PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES)
|
||
PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40
|
||
|
||
phrase_fixed_last_bytes = b'\xA5\x2C'
|
||
|
||
def read_bytes(position, length=1):
|
||
with open(LEX_FILE, 'rb+') as file:
|
||
file.seek(position)
|
||
return file.read(length)
|
||
|
||
|
||
def replace_bytes(position, value):
|
||
with open(LEX_FILE, 'rb+') as file:
|
||
file.seek(position)
|
||
data = file.read()
|
||
file.seek(position)
|
||
file.write(value + data[len(value):])
|
||
|
||
|
||
def bytes2int(data):
|
||
return int.from_bytes(data, byteorder='little')
|
||
|
||
|
||
def int2bytes(data, length=1):
|
||
return int.to_bytes(data, length=length, byteorder='little')
|
||
|
||
|
||
def padded_bytes(s):
|
||
def padded_byte(c):
|
||
b = bytes(c, PADDED_ENCODING)
|
||
return b + b'\x00' if len(b) == 1 else b
|
||
return b''.join([padded_byte(c) for c in s])
|
||
|
||
|
||
def get_phrase_header(header_pinyin_len, index):
|
||
return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2)
|
||
+ int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00'
|
||
+ phrase_fixed_last_bytes)
|
||
|
||
def main():
|
||
global phrase_fixed_last_bytes
|
||
|
||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||
phrases_file = Path(current_dir) / 'phrases.txt'
|
||
phrases_text = PHRASES_TEXT
|
||
if phrases_file.exists():
|
||
try:
|
||
phrases_file_text = phrases_file.read_text('utf-8')
|
||
except:
|
||
phrases_file_text = phrases_file.read_text('gbk')
|
||
phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n')
|
||
phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x]))
|
||
|
||
print(f"==================\n"
|
||
f"Author: Scruel Tao\n"
|
||
f"==================\n\n"
|
||
f"正在修正巨硬拼音并添加\n"
|
||
f"预置的日期格式化短语……\n"
|
||
f"\n"
|
||
f"短语数量:{len(phrase_items)}\n"
|
||
)
|
||
|
||
last_phrase_pos = 0
|
||
phrase_list = [] # (is_new, pinyin, header, phrase))
|
||
|
||
if not os.path.exists(LEX_FILE):
|
||
with open(LEX_FILE, 'wb') as f:
|
||
# Initing lex file
|
||
f.write(HEADER_BYTES)
|
||
f.write((b'\x40' + b'\x00' * 3) * 3)
|
||
f.write(b'\x00' * 4)
|
||
f.write(b'\x38\xd2\xa3\x65')
|
||
f.write(b'\x00' * 32)
|
||
else:
|
||
phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4))
|
||
phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1)
|
||
|
||
# Read existing phrases
|
||
for i in range(phrase_cnt):
|
||
if i == phrase_cnt - 1:
|
||
phrase_block_pos = phrase_block_len = -1
|
||
else:
|
||
phrase_block_pos = bytes2int(
|
||
read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4))
|
||
phrase_block_len = phrase_block_pos - last_phrase_pos
|
||
phrase_block_bytes = read_bytes(
|
||
phrase_block_first_pos + last_phrase_pos, phrase_block_len)
|
||
last_phrase_pos = phrase_block_pos
|
||
pinyin_bytes, phrase_bytes = re.match(
|
||
(b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups()
|
||
phrase_fixed_last_bytes = phrase_block_bytes[14:16]
|
||
# Prevent deleted phrases
|
||
if phrase_block_bytes[9:10] == b'\x00':
|
||
phrase_list.append((0, pinyin_bytes,
|
||
phrase_block_bytes[:16], phrase_bytes))
|
||
|
||
# Fix custom phrases
|
||
for item in phrase_items:
|
||
if not item:
|
||
continue
|
||
pinyin, index, phrase = item.split(maxsplit=2)
|
||
pinyin_bytes = padded_bytes(pinyin)
|
||
phrase_bytes = padded_bytes(phrase)
|
||
phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes]
|
||
header = get_phrase_header(
|
||
16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index))
|
||
phrase_list.append((1, pinyin_bytes, header, phrase_bytes))
|
||
|
||
# Necessary fix, otherwise the order of phrases will be messed up.
|
||
phrase_list.sort(key=lambda x: x[1])
|
||
|
||
# Write phrases
|
||
tolast_phrase_pos = 0
|
||
total_size = PHRASE_LEN_FIRST_POS
|
||
with open(LEX_FILE, 'rb+') as file:
|
||
file.seek(PHRASE_LEN_FIRST_POS)
|
||
file.truncate()
|
||
for _, *items in phrase_list[:-1]:
|
||
phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2
|
||
tolast_phrase_pos += phrase_len
|
||
file.write(int2bytes(tolast_phrase_pos, length=4))
|
||
total_size += PHRASE_SEPARATOR_SIZE * 2
|
||
for _, pinyin_bytes, header, phrase_bytes in phrase_list:
|
||
file.write(header)
|
||
data_bytes = PHRASE_SEPARATOR_BYTES.join(
|
||
[pinyin_bytes, phrase_bytes, b''])
|
||
file.write(data_bytes)
|
||
total_size += len(header) + len(data_bytes)
|
||
|
||
# Fix file header
|
||
replace_bytes(PHRASE_64PCNT_POS, int2bytes(
|
||
64 + len(phrase_list) * 4, length=4))
|
||
replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4))
|
||
replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
try:
|
||
main()
|
||
print('Done')
|
||
except:
|
||
traceback.print_exc()
|
||
os.system('pause') |