Auto Export of attachments: PDF's (for Paperless-NGX)

The Problem

Im using paperless to manage all my documents. Currently I need to click trough all email to export the pdf’s and place them into the dir where paperless can read and transform my documents.

Proposed Solution

It would be nice, if there would be an feature to auto-export all attachments or pdf’s to a specified dir so paperless automatically reads all attachments without me reading the mail and going through them all one by one.

Solution untill its implemented

For all who want something similar I made a python script that loads all pdf and places them in a directory. Place the script in the autostart so it runs everytime you start your computer.

import imaplib
import email
import os
import re
import json

def connect_to_email_server(host, username, password):
    """Establishes a secure IMAP connection to the specified email server."""
    print(f"Connecting to {host} for {username}")
    mail = imaplib.IMAP4_SSL(host)
    mail.login(username, password)
    mail.select('inbox')
    return mail


def make_safe_filename(filename):
    """Sanitizes the filename to ensure it is safe for use in file system paths."""
    filename = re.sub(r'[\\/*?"<>|\r\n]', '-', filename)
    filename = re.sub(r'\s+', ' ', filename)
    filename = filename.encode('ascii', 'ignore').decode('ascii')
    max_length = 255
    if len(filename) > max_length:
        filename = filename[:max_length]
    filename = filename.strip()
    return filename


def save_state(email_username, latest_email_id):
    """Saves the state of the email processing (latest email ID) to a file."""
    state_file = f'email_state_{email_username}.txt'
    with open(state_file, 'w') as file:
        file.write(str(latest_email_id))


def load_state(email_username):
    """Loads the saved state (latest email ID) from a file."""
    state_file = f'email_state_{email_username}.txt'
    if os.path.exists(state_file):
        with open(state_file, 'r') as file:
            return int(file.read().strip())
    return 0


def save_attachment(part, filename, username, save_folder):
    """Saves the email attachment to a specified folder with a unique filename."""
    safe_filename = make_safe_filename(filename)
    filepath = os.path.join(save_folder, safe_filename)

    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    counter = 1
    while os.path.exists(filepath):
        name, ext = os.path.splitext(safe_filename)
        new_name = f"{name}_{counter}{ext}"
        filepath = os.path.join(save_folder, new_name)
        counter += 1

    with open(filepath, 'wb') as fp:
        fp.write(part.get_payload(decode=True))
    print(f"Saved attachment: {safe_filename} for {username}")


def fetch_and_save_attachments(mail, username, base_save_folder):
    """Fetches and saves new email attachments for a given email account."""
    unique_save_folder = os.path.join(base_save_folder, username)
    last_saved_email_id = load_state(username)
    print(f"Checking for new emails for {username}")

    result, data = mail.search(None, 'ALL')
    mail_ids = data[0].split()

    total_emails = len(mail_ids)
    new_emails = total_emails - last_saved_email_id
    print(f"Total emails: {total_emails}, New emails since last check: {new_emails}")

    for i in range(last_saved_email_id, total_emails):
        email_id = mail_ids[i]
        _, data = mail.fetch(email_id, '(RFC822)')
        raw_email = data[0][1]
        email_message = email.message_from_bytes(raw_email)

        for part in email_message.walk():
            if part.get_content_maintype() == 'multipart' or part.get('Content-Disposition') is None:
                continue

            filename = part.get_filename()
            if filename and filename.endswith('.pdf'):
                save_attachment(part, filename, username, unique_save_folder)

        save_state(username, i + 1)


def load_or_create_config(config_file):
    """Loads the email configuration from a JSON file or creates a default configuration if not found."""
    default_config = {
        "base_save_folder": r"C:\dev\Mail Attachment Exporter\pdfs",
        "email_configs": [
            {'host': 'imap.example.com', 'username': 'user@example.com', 'password': 'password'}
        ]
    }

    if not os.path.exists(config_file):
        with open(config_file, 'w') as file:
            json.dump(default_config, file, indent=4)
        print("Default configuration file created at:", config_file)
        print("Please modify the configuration file with your settings and rerun the script.")
        exit()

    with open(config_file, 'r') as file:
        return json.load(file)


if __name__ == "__main__":
    config_file = 'email_config.json'
    config = load_or_create_config(config_file)

    base_save_folder = config['base_save_folder']
    email_configs = config['email_configs']

    for config in email_configs:
        mail = connect_to_email_server(config['host'], config['username'], config['password'])
        fetch_and_save_attachments(mail, config['username'], base_save_folder)
        mail.logout()
        print(f"Logged out from {config['username']}'s email server.")