-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollect_old_posts.py
More file actions
97 lines (78 loc) · 3.11 KB
/
collect_old_posts.py
File metadata and controls
97 lines (78 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import asyncio
import random
import config
import csv
import io
from telethon import TelegramClient, errors
from telethon.tl.types import InputMessagesFilterEmpty, MessageService
def contains_ignored_tags(text: str) -> bool:
lowered = text.lower()
return any(tag in lowered for tag in config.IGNORE_TAGS)
async def fetch_messages(client, channel):
messages = []
async for msg in client.iter_messages(
channel,
offset_date=config.MONTH,
reverse=True,
limit=None,
filter=InputMessagesFilterEmpty
):
if isinstance(msg, MessageService):
continue
text = msg.message or ""
if not text.strip() and not msg.media:
continue
messages.append(msg)
return messages
async def main():
async with TelegramClient(
session=config.ACCOUNT_NAME,
api_id=config.API_ID,
api_hash=config.API_HASH,
system_version=config.SYSTEM_VERSION
) as client:
all_messages = []
shuffled_channels = config.CHANNELS[:]
random.shuffle(shuffled_channels)
for channel in shuffled_channels:
clean_channel = channel.strip("@")
try:
msgs = await fetch_messages(client, channel)
print(f"[INFO] Получено сообщений из {channel}: {len(msgs)}")
for msg in msgs:
text = msg.message or ""
if contains_ignored_tags(text):
continue
all_messages.append({
"id": msg.id,
"channel": clean_channel,
"date": msg.date.strftime("%Y-%m-%d %H:%M:%S"),
"url": f"https://t.me/{clean_channel}/{msg.id}",
"text": text.replace("\n", " ").strip()
})
except Exception as e:
print(f"[ERROR][{channel}] {e}")
print(f"[INFO] Всего сообщений к сохранению: {len(all_messages)}")
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=["id", "channel", "date", "url", "text"])
writer.writeheader()
for item in all_messages:
writer.writerow(item)
output.seek(0)
csv_bytes = io.BytesIO(output.getvalue().encode('utf-8'))
csv_bytes.name = f"vacancies_{msg.date.strftime("%Y-%m-%d %H:%M:%S")}.csv"
try:
await client.send_file(
config.FORWARD_TO_CHAT,
file=csv_bytes,
caption=f"Собрано {len(all_messages)} сообщений"
)
print("[INFO] CSV успешно отправлен.")
except errors.FloodWaitError as e:
print(f"[FLOOD_WAIT] Ожидание {e.seconds} секунд...")
await asyncio.sleep(e.seconds)
except Exception as e:
print(f"[ERROR] Ошибка при отправке файла: {e}")
print("Работа завершена.")
if __name__ == "__main__":
asyncio.run(main())