my-script-tools/python/srf.py
2025-02-28 10:41:04 +08:00

175 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
修正微软拼音输入法无法添加多个格式化自定义短语的问题
Author: Scruel Tao
"""
import os
import re
import pathlib
import traceback
from pathlib import Path
# 自定义: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格
# 或也可在该脚本的同目录下,创建一个 phrases.txt在其中以同一格式写入自定义短语
PHRASES_TEXT = """
dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
time 1 %yyyy%%MM%%dd%%HH%%mm%%ss%
time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
date 1 %yyyy%%MM%%dd%%HH%%mm%%ss%
""".strip()
LEX_FILE = os.path.join(os.getenv('APPDATA'),
r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex')
HEADER_LEN = 16 + 4
PHRASE_64PCNT_POS = HEADER_LEN
TOTAL_BYTES_POS = HEADER_LEN + 4
PHRASE_CNT_POS = HEADER_LEN + 8
PADDED_ENCODING = 'utf-16le'
HEADER_BYTES = bytes('mschxudp', encoding='ascii')
HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING)
PHRASE_SEPARATOR_BYTES = b'\x00\x00'
PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES)
PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40
phrase_fixed_last_bytes = b'\xA5\x2C'
def read_bytes(position, length=1):
with open(LEX_FILE, 'rb+') as file:
file.seek(position)
return file.read(length)
def replace_bytes(position, value):
with open(LEX_FILE, 'rb+') as file:
file.seek(position)
data = file.read()
file.seek(position)
file.write(value + data[len(value):])
def bytes2int(data):
return int.from_bytes(data, byteorder='little')
def int2bytes(data, length=1):
return int.to_bytes(data, length=length, byteorder='little')
def padded_bytes(s):
def padded_byte(c):
b = bytes(c, PADDED_ENCODING)
return b + b'\x00' if len(b) == 1 else b
return b''.join([padded_byte(c) for c in s])
def get_phrase_header(header_pinyin_len, index):
return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2)
+ int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00'
+ phrase_fixed_last_bytes)
def main():
global phrase_fixed_last_bytes
current_dir = os.path.dirname(os.path.realpath(__file__))
phrases_file = Path(current_dir) / 'phrases.txt'
phrases_text = PHRASES_TEXT
if phrases_file.exists():
try:
phrases_file_text = phrases_file.read_text('utf-8')
except:
phrases_file_text = phrases_file.read_text('gbk')
phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n')
phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x]))
print(f"==================\n"
f"Author: Scruel Tao\n"
f"==================\n\n"
f"正在修正巨硬拼音并添加\n"
f"预置的日期格式化短语……\n"
f"\n"
f"短语数量:{len(phrase_items)}\n"
)
last_phrase_pos = 0
phrase_list = [] # (is_new, pinyin, header, phrase))
if not os.path.exists(LEX_FILE):
with open(LEX_FILE, 'wb') as f:
# Initing lex file
f.write(HEADER_BYTES)
f.write((b'\x40' + b'\x00' * 3) * 3)
f.write(b'\x00' * 4)
f.write(b'\x38\xd2\xa3\x65')
f.write(b'\x00' * 32)
else:
phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4))
phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1)
# Read existing phrases
for i in range(phrase_cnt):
if i == phrase_cnt - 1:
phrase_block_pos = phrase_block_len = -1
else:
phrase_block_pos = bytes2int(
read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4))
phrase_block_len = phrase_block_pos - last_phrase_pos
phrase_block_bytes = read_bytes(
phrase_block_first_pos + last_phrase_pos, phrase_block_len)
last_phrase_pos = phrase_block_pos
pinyin_bytes, phrase_bytes = re.match(
(b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups()
phrase_fixed_last_bytes = phrase_block_bytes[14:16]
# Prevent deleted phrases
if phrase_block_bytes[9:10] == b'\x00':
phrase_list.append((0, pinyin_bytes,
phrase_block_bytes[:16], phrase_bytes))
# Fix custom phrases
for item in phrase_items:
if not item:
continue
pinyin, index, phrase = item.split(maxsplit=2)
pinyin_bytes = padded_bytes(pinyin)
phrase_bytes = padded_bytes(phrase)
phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes]
header = get_phrase_header(
16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index))
phrase_list.append((1, pinyin_bytes, header, phrase_bytes))
# Necessary fix, otherwise the order of phrases will be messed up.
phrase_list.sort(key=lambda x: x[1])
# Write phrases
tolast_phrase_pos = 0
total_size = PHRASE_LEN_FIRST_POS
with open(LEX_FILE, 'rb+') as file:
file.seek(PHRASE_LEN_FIRST_POS)
file.truncate()
for _, *items in phrase_list[:-1]:
phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2
tolast_phrase_pos += phrase_len
file.write(int2bytes(tolast_phrase_pos, length=4))
total_size += PHRASE_SEPARATOR_SIZE * 2
for _, pinyin_bytes, header, phrase_bytes in phrase_list:
file.write(header)
data_bytes = PHRASE_SEPARATOR_BYTES.join(
[pinyin_bytes, phrase_bytes, b''])
file.write(data_bytes)
total_size += len(header) + len(data_bytes)
# Fix file header
replace_bytes(PHRASE_64PCNT_POS, int2bytes(
64 + len(phrase_list) * 4, length=4))
replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4))
replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4))
if __name__ == "__main__":
try:
main()
print('Done')
except:
traceback.print_exc()
os.system('pause')