Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .idea/.gitignore → .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ details.py
Collection
__pycache__
*.session
*.session-journal
*.session-journal
*.txt
*.csv
10 changes: 4 additions & 6 deletions assessment.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def extract_sentences(username, input_csv, output_pdf, target_phrase_list1, targ
doc.build(story)
print(f"Key phrase extraction report saved to {output_pdf_path}")

if __name__ == "__main__":

def main():
# Define target phrase lists
target_phrase_list1 = ["my", "buy", "buying", "get", "getting", "got", "have", "acquire", "acquiring", "obtain", "obtaining", "procure", "procuring", "wanting", "want to", "want a" "going to", "own", "owning", "license", "a", "with", "certified", "need", "stolen", "steal a", "3d print", "3d-print", "3d printed", "3d-printed", "borrow", "take",]
target_phrase_list2 = ["gun", "rifle", "pistol", "knife", "shotgun", "revolver", "firearm", "firearms", "SMG", "AR", "sawnoff", "sawn off", "sawn-off", "machine gun", "doublebarrel", "doublebarreled", "double-barrel", "double-barreled", "bolt-action", "bolt-action", "lever-action", "lever action", "pump-action", "semi-automatic", "semiautomatic", "fully automatic", "ar-15", "ar15", "AK-47", "M4", "M16", "remington", "glock", "sig", "springfield", "ruger", "Smith & Wesson", "S&W", "M&P", "Colt", "Winchester", "benelli", "M&P15", "kel-tec", "KSG", "590", "870", "LE6920", "AR-556", "G19", "85", "taurus", "629", ".85", ".45", ".22", "22", "9mm", "9 mm", ".30", "beretta", ".50", "50cal", "50 cal", "bushmaster", "M1911", "12 gauge", "12gauge", "12ga","geissele", "chamber a round" , "ammo", "ammunition", "caliber", "gauge", "magazine", "buck shot", "buckshot", "armor-piercing", "hollow point", "hollow points","birdshot", "bird shot", "gun range", "rifle club", "shooting pracitice", "shooting range", "firearms training ", "hunting", "duck shooting", "target shooting", "target pracitice", "scope", "silencer", "suppressor", "compensator","stock", "barrel", "muzzle", "bipod", "firing pin", "optics", "crossbow", "compound bow", "pipe bomb", "pipebomb", "pipe-bomb", "pipebombs", "pipe-bombs", "grenade", "grenades", "IED", "improvised explosive device", "ball bearings", "molotov", "nitrate", "TNT", "landmine", "firebomb", "tannerite", "semtex", "fertilizer", "detonater", "detornaters", "nitroglycerin", "ammonium nitrate", "propellant", "thermalite", "thermite", "blasting cap", "det cord", "detcord", "boom stick", "fire bomb", "explosives", "kevlar", "body armour", "stab-proof vest", "stabproof vest",]
Expand All @@ -104,8 +105,5 @@ def extract_sentences(username, input_csv, output_pdf, target_phrase_list1, targ
# Run the extraction and PDF generation
extract_sentences(target_username, "input.csv", target_username + "_threat_assessment.pdf", target_phrase_list1, target_phrase_list2, target_phrase_list3)

# Ask if the user wants to return to the launcher
launcher = input('Do you want to return to the launcher? (y/n)')
if launcher == 'y':
print('Restarting...')
exec(open("launcher.py").read())
if __name__ == "__main__":
main()
9 changes: 5 additions & 4 deletions channellist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


async def scrape_forwards(channel_name):
l = []
channel_info = []
source_urls = []
count = 0

Expand All @@ -29,7 +29,7 @@ async def scrape_forwards(channel_name):
ent = await client.get_entity(id)
target_channel_entity = await client.get_entity(message.to_id.channel_id)
target_channel_title = target_channel_entity.title
l.append([ent.title, target_channel_title])
channel_info.append([ent.title, target_channel_title])
source_url = f"https://t.me/{ent.username}"
source_urls.append(source_url)
count += 1
Expand All @@ -42,7 +42,7 @@ async def scrape_forwards(channel_name):
except Exception as e:
print(f"{Fore.RED}Skipping forward: Private/Inaccessible{Style.RESET_ALL}")

df = pd.DataFrame(l, columns=['From', 'To'])
df = pd.DataFrame(channel_info, columns=['From', 'To'])
source_df = pd.DataFrame(source_urls, columns=['SourceURL'])

os.makedirs('Adjacency List', exist_ok=True)
Expand Down Expand Up @@ -75,9 +75,10 @@ async def main():
await scrape_forwards(channel)
print("CSV files created for", channel)
print()
print('Forwards scraped successfully.')


if __name__ == '__main__':
asyncio.run(main())

print('Forwards scraped successfully.')

127 changes: 62 additions & 65 deletions channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,76 +12,73 @@


async def main():
client = TelegramClient(phone, api_id, api_hash)
await client.start()
while True:
client = TelegramClient(phone, api_id, api_hash)
await client.start()

print(' ')
print('This tool will scrape a Telegram channel for all forwarded messages and their original source.')
print(' ')
print(' ')
print('This tool will scrape a Telegram channel for all forwarded messages and their original source.')
print(' ')

while True:
try:
channel_name = input("Please enter a Telegram channel name:\n")
print(f'You entered "{channel_name}"')
answer = input('Is this correct? (y/n)')
if answer == 'y':
print('Scraping forwards from', channel_name, '...')
break
except Exception:
continue

l = []
source_urls = []
count = 0

async for message in client.iter_messages(channel_name):
if message.forward is not None:
while True:
try:
id = message.forward.original_fwd.from_id
if id is not None:
try:
ent = await client.get_entity(id)
target_channel_entity = await client.get_entity(message.to_id.channel_id)
target_channel_title = target_channel_entity.title
l.append([ent.title, target_channel_title])
source_url = f"https://t.me/{ent.username}"
source_urls.append(source_url)
count += 1
print(
f"From {Fore.CYAN + ent.title + Style.RESET_ALL} to {Fore.YELLOW + target_channel_title + Style.RESET_ALL}")
except ValueError as e:
print("Skipping forward:", e)
except Exception as e:
print(f"{Fore.RED}Skipping forward: Private/Inaccessible{Style.RESET_ALL}")

# Create the folders if they don't exist
adjacency_folder = 'Adjacency List'
urls_folder = 'Source URLs'
os.makedirs(adjacency_folder, exist_ok=True)
os.makedirs(urls_folder, exist_ok=True)

df = pd.DataFrame(l, columns=['From', 'To'])
df.to_csv(os.path.join(adjacency_folder, f'{channel_name}.csv'), header=False, index=False)

source_df = pd.DataFrame(source_urls, columns=['SourceURL'])
source_df.to_csv(os.path.join(urls_folder, f'{channel_name}SourceURLs.csv'), header=False, index=False)

await client.disconnect()

channel_name = input("Please enter a Telegram channel name:\n")
print(f'You entered "{channel_name}"')
answer = input('Is this correct? (y/n)')
if answer.lower() == 'y':
print('Scraping forwards from', channel_name, '...')
break
except Exception:
continue

forwarded_messages_info = []
source_urls = []
count = 0

async for message in client.iter_messages(channel_name):
if message.forward is not None:
try:
id = message.forward.original_fwd.from_id
if id is not None:
try:
ent = await client.get_entity(id)
target_channel_entity = await client.get_entity(message.to_id.channel_id)
target_channel_title = target_channel_entity.title
forwarded_messages_info.append([ent.title, target_channel_title])
source_url = f"https://t.me/{ent.username}"
source_urls.append(source_url)
count += 1
print(
f"From {Fore.CYAN + ent.title + Style.RESET_ALL} to {Fore.YELLOW + target_channel_title + Style.RESET_ALL}")
except ValueError as e:
print("Skipping forward:", e)
except Exception as e:
print(f"{Fore.RED}Skipping forward: Private/Inaccessible{Style.RESET_ALL}")

# Create the folders if they don't exist
adjacency_folder = 'Adjacency List'
urls_folder = 'Source URLs'
os.makedirs(adjacency_folder, exist_ok=True)
os.makedirs(urls_folder, exist_ok=True)

df = pd.DataFrame(forwarded_messages_info, columns=['From', 'To'])
df.to_csv(os.path.join(adjacency_folder, f'{channel_name}.csv'), header=False, index=False)

source_df = pd.DataFrame(source_urls, columns=['SourceURL'])
source_df.to_csv(os.path.join(urls_folder, f'{channel_name}SourceURLs.csv'), header=False, index=False)

await client.disconnect()

print('Forwards scraped successfully.')

again = input('Do you want to scrape more channels? (y/n)')
if again.lower() == 'y':
print('Restarting...')

else:
break

if __name__ == '__main__':
asyncio.run(main())

print('Forwards scraped successfully.')

again = input('Do you want to scrape more channels? (y/n)')
if again == 'y':
print('Restarting...')
exec(open("channels.py").read())
else:
pass

launcher = input('Do you want to return to the launcher? (y/n)')
if launcher == 'y':
print('Restarting...')
exec(open("launcher.py").read())
3 changes: 1 addition & 2 deletions channelscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
api_hash = ds.apiHash
phone = ds.number


async def scrape_channel_content(channel_name):
async with TelegramClient(phone, api_id, api_hash) as client:
try:
Expand Down Expand Up @@ -60,7 +59,7 @@ async def main():
f"{Fore.CYAN}Please enter a target Telegram channel (e.g., https://t.me/{Fore.LIGHTYELLOW_EX}your_channel{Style.RESET_ALL}):\n")
print(f'You entered "{Fore.LIGHTYELLOW_EX}{channel_name}{Style.RESET_ALL}"')
answer = input('Is this correct? (y/n)')
if answer != 'y':
if answer.lower() != 'y':
return

output_directory = f"Collection/{channel_name}"
Expand Down
Loading