The Problem
Im using paperless to manage all my documents. Currently I need to click trough all email to export the pdf’s and place them into the dir where paperless can read and transform my documents.
Proposed Solution
It would be nice, if there would be an feature to auto-export all attachments or pdf’s to a specified dir so paperless automatically reads all attachments without me reading the mail and going through them all one by one.
Solution untill its implemented
For all who want something similar I made a python script that loads all pdf and places them in a directory. Place the script in the autostart so it runs everytime you start your computer.
import imaplib
import email
import os
import re
import json
def connect_to_email_server(host, username, password):
"""Establishes a secure IMAP connection to the specified email server."""
print(f"Connecting to {host} for {username}")
mail = imaplib.IMAP4_SSL(host)
mail.login(username, password)
mail.select('inbox')
return mail
def make_safe_filename(filename):
"""Sanitizes the filename to ensure it is safe for use in file system paths."""
filename = re.sub(r'[\\/*?"<>|\r\n]', '-', filename)
filename = re.sub(r'\s+', ' ', filename)
filename = filename.encode('ascii', 'ignore').decode('ascii')
max_length = 255
if len(filename) > max_length:
filename = filename[:max_length]
filename = filename.strip()
return filename
def save_state(email_username, latest_email_id):
"""Saves the state of the email processing (latest email ID) to a file."""
state_file = f'email_state_{email_username}.txt'
with open(state_file, 'w') as file:
file.write(str(latest_email_id))
def load_state(email_username):
"""Loads the saved state (latest email ID) from a file."""
state_file = f'email_state_{email_username}.txt'
if os.path.exists(state_file):
with open(state_file, 'r') as file:
return int(file.read().strip())
return 0
def save_attachment(part, filename, username, save_folder):
"""Saves the email attachment to a specified folder with a unique filename."""
safe_filename = make_safe_filename(filename)
filepath = os.path.join(save_folder, safe_filename)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
counter = 1
while os.path.exists(filepath):
name, ext = os.path.splitext(safe_filename)
new_name = f"{name}_{counter}{ext}"
filepath = os.path.join(save_folder, new_name)
counter += 1
with open(filepath, 'wb') as fp:
fp.write(part.get_payload(decode=True))
print(f"Saved attachment: {safe_filename} for {username}")
def fetch_and_save_attachments(mail, username, base_save_folder):
"""Fetches and saves new email attachments for a given email account."""
unique_save_folder = os.path.join(base_save_folder, username)
last_saved_email_id = load_state(username)
print(f"Checking for new emails for {username}")
result, data = mail.search(None, 'ALL')
mail_ids = data[0].split()
total_emails = len(mail_ids)
new_emails = total_emails - last_saved_email_id
print(f"Total emails: {total_emails}, New emails since last check: {new_emails}")
for i in range(last_saved_email_id, total_emails):
email_id = mail_ids[i]
_, data = mail.fetch(email_id, '(RFC822)')
raw_email = data[0][1]
email_message = email.message_from_bytes(raw_email)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart' or part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
if filename and filename.endswith('.pdf'):
save_attachment(part, filename, username, unique_save_folder)
save_state(username, i + 1)
def load_or_create_config(config_file):
"""Loads the email configuration from a JSON file or creates a default configuration if not found."""
default_config = {
"base_save_folder": r"C:\dev\Mail Attachment Exporter\pdfs",
"email_configs": [
{'host': 'imap.example.com', 'username': 'user@example.com', 'password': 'password'}
]
}
if not os.path.exists(config_file):
with open(config_file, 'w') as file:
json.dump(default_config, file, indent=4)
print("Default configuration file created at:", config_file)
print("Please modify the configuration file with your settings and rerun the script.")
exit()
with open(config_file, 'r') as file:
return json.load(file)
if __name__ == "__main__":
config_file = 'email_config.json'
config = load_or_create_config(config_file)
base_save_folder = config['base_save_folder']
email_configs = config['email_configs']
for config in email_configs:
mail = connect_to_email_server(config['host'], config['username'], config['password'])
fetch_and_save_attachments(mail, config['username'], base_save_folder)
mail.logout()
print(f"Logged out from {config['username']}'s email server.")