This commit is contained in:
2026-03-04 01:21:06 +03:00
parent c7ad35d343
commit 9b004c3a86
7 changed files with 598 additions and 49 deletions
+102
View File
@@ -0,0 +1,102 @@
import argparse
import asyncio
import re
from docx import Document
import asyncpg
def parse_residents(docx_path: str) -> list[dict]:
doc = Document(docx_path)
results = []
seen = set()
for table in doc.tables:
current_room = None
for row in table.rows:
cells = row.cells
if len(cells) < 3:
continue
cell0_text = cells[0].text.strip()
room_match = re.search(r'\b(\d{3,4})\b', cell0_text)
if room_match:
current_room = int(room_match.group(1))
name_text = cells[2].text.strip()
skip_keywords = ('Дата', 'Фамилия', 'Осталось', 'Совершеннолетних')
if name_text and current_room and not any(kw in name_text for kw in skip_keywords):
clean_name = re.sub(r'\s+[СC]$', '', name_text).strip()
if clean_name and len(clean_name) > 2:
key = (current_room, clean_name)
if key not in seen:
seen.add(key)
results.append({"room_number": current_room, "real_name": clean_name})
return results
async def insert_data(database_url: str, residents: list[dict]) -> None:
async with asyncpg.create_pool(database_url) as pool:
async with pool.acquire() as conn:
async with conn.transaction():
floor_rows = await conn.fetch("SELECT id, number FROM floors;")
floor_map = {row['number']: row['id'] for row in floor_rows}
if not floor_map:
raise RuntimeError("Таблица floors пуста, невозможно привязать комнаты.")
unique_rooms = set(r["room_number"] for r in residents)
room_insert_params = []
for room_num in unique_rooms:
floor_num = room_num // 100
if floor_num not in floor_map:
raise ValueError(f"Для комнаты {room_num} не найден этаж {floor_num} в БД.")
room_insert_params.append((room_num, floor_map[floor_num]))
await conn.executemany(
"""
INSERT INTO rooms (number, on_floor)
VALUES ($1, $2)
ON CONFLICT (number) DO NOTHING;
""",
room_insert_params
)
room_rows = await conn.fetch(
"""
SELECT id, number
FROM rooms
WHERE number = ANY($1::int[]);
""",
list(unique_rooms)
)
room_map = {row['number']: row['id'] for row in room_rows}
resident_params = [
(r["real_name"], room_map[r["room_number"]])
for r in residents
]
await conn.executemany(
"""
INSERT INTO residents (real_name, room, created_at, updated_at)
VALUES ($1, $2, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP);
""",
resident_params
)
async def main() -> None:
parser = argparse.ArgumentParser(description="Импорт жильцов из docx в PostgreSQL")
parser.add_argument("--db-url", required=True, help="URL базы данных (напр. postgresql://user:pass@localhost/db)")
parser.add_argument("--file", required=True, help="Путь к docx файлу")
args = parser.parse_args()
residents = parse_residents(args.file)
await insert_data(args.db_url, residents)
if __name__ == "__main__":
asyncio.run(main())