home » zplus/clif.git
ID: fea03fba7c16146e41194cbc0dfc27a6004e0974
307 lines — 11K — View raw


#!/home/git/clif/venv/bin/python

###############################################################################
# This script is called by Postfix every time it receives an email.
# This script will accept incoming emails and add them to the mailing lists repositories.
###############################################################################

import datetime
import email
import email.policy
import hashlib
import logging
import os
import pygit2
import smtplib
import sys


###############################################################################
# SETTINGS
###############################################################################

# The "domain" part in address@domain for the mailing lists.
# All emails addressed to another domain will be ignored.
SERVER_DOMAIN = os.environ.get('SERVER_DOMAIN')

# The folder containing the git repositories.
# If using Gitolite, this is the Gitolite's "repositories" folder
REPOSITORIES_PATH = os.environ.get('REPOSITORIES_PATH')

# Level    | Numeric value
# ---------|--------------
# CRITICAL | 50
# ERROR    | 40
# WARNING  | 30
# INFO     | 20
# DEBUG    | 10
# NOTSET   |  0
logging.basicConfig(filename='/home/git/clif/emails.log',
                    level=logging.NOTSET,
                    format='[%(asctime)s] %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S%z')

assert SERVER_DOMAIN
assert REPOSITORIES_PATH

###############################################################################




# Validate incoming email
###############################################################################

# Retrieve the email message from stdin (piped from Postfix to this script)
message_raw = sys.stdin.read()

try:
    message = email.message_from_string(message_raw, policy=email.policy.default)

    email_id = message.get('message-id').strip()
    email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID

    email_from = email.utils.parseaddr(message.get('from'))
    assert len(email_from[1]) > 0

    email_to = email.utils.parseaddr(message.get('to'))
    assert len(email_from[1]) > 0
    assert email_to[1].endswith('@' + SERVER_DOMAIN)

    email_in_reply_to = message.get('in-reply-to')
    if email_in_reply_to:
        email_in_reply_to = email_in_reply_to.strip()

    email_subject = message.get('subject', '').strip()

    # Accept plaintext only!
    email_body = message.get_body(('plain',)).get_content().strip()

    # Get the repository name. We use email addresses formatted as <repository>@SERVER_DOMAIN
    repository_name = email_to[1].rsplit('@', 1)[0]

    repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git')

    # Repository names must not contain ".." otherwise it would be possible to
    # point to folders outside REPOSITORIES_PATH
    assert '..' not in repository_name
    # Repositories must be <username>/<reponame>
    assert '/' in repository_name

    assert os.path.isdir(repository_path)

    logging.info('Received valid email UID:{} From:{} To:{} Subject:"{}"'.format(
        uid, email_from, email_to, email_subject))

except Exception as e:

    logging.info('Received invalid email UID:{} From:{} To:{} Subject:"{}"'.format(
        uid, email_from, email_to, email_subject))

    logging.info(e)
    exit()




# Load repository from disk
###############################################################################

try:
    repo = pygit2.Repository(repository_path)
except:
    logging.error('Not a valid repository: {}'.format(repository_path))
    exit()

try:
    head_tree = repo.revparse_single('HEAD').tree
except:
    logging.info('Could not find HEAD ref for repository {}. A new tree will be created.'.format(repository_path))
    head_tree = None

try:
    subscribers = []
    for addr in head_tree['subscribers'].data.decode('UTF-8').splitlines():
        addr = addr.strip()
        if len(addr) > 0:
            subscribers.append(addr)
except:
    subscribers = []
    logging.info('Subscribers file not found or invalid for repository {}. A new one will be created.'.format(repository_path))




# Handle subscription requests
###############################################################################

# Is this a request for subscription?
request_subscribe   = email_subject.upper('SUBSCRIBE')
request_unsubscribe = email_subject.upper('UNSUBSCRIBE')

if request_subscribe:
    # Already subscribed?
    if email_from[1] in subscribers:
        logging.info('{} already subscribed to {}'.format(email_from, repository_path))
        exit()

    subscribers.append(email_from[1])
    commit_message = 'Subscribe'

if request_unsubscribe
    # Already unsubscribed?
    if email_from[1] not in subscribers:
        logging.info('{} already unsubscribed from {}'.format(email_from, repository_path))
        exit()

    subscribers = [ address for address in subscribers if address != email_from[1] ]
    commit_message = 'Unsubscribe'

# Commit the new list of subscribers to the git repository
if request_subscribe or request_unsubscribe:
    # Add a new BLOB to the git store
    oid = repo.create_blob('\n'.join(subscribers).encode('UTF-8'))

    # Add the blob that we've just created to the HEAD tree
    head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
    head_tree_builder.insert('subscribers', oid, pygit2.GIT_FILEMODE_BLOB)
    head_tree_oid = head_tree_builder.write()

    repo.create_commit(
        'HEAD',                             # reference name
        pygit2.Signature('CLIF', '-'),      # author
        pygit2.Signature('CLIF', '-'),      # committer
        commit_message,                     # message
        head_tree_oid,                      # tree of this commit
        [] if repo.is_empty else [ repo.head.target ] # parents commit
    )

    if request_subscribe:
        logging.info('{} is now subscribed to {}'.format(email_from, repository_path))
    if request_unsubscribe:
        logging.info('{} is now unsubscribed from {}'.format(email_from, repository_path))

    exit()




# If it was not a subscription request, then it's a message. Add it to the
# repository.
# If the email contains the In-Reply-To header, we retrieve the existing tree
# for the thread. Otherwise, we will create a new tree.
###############################################################################

if not email_body or len(email_body) == 0:
    logging.info('Refuting email without plaintext body: {}'.format(email_subject))
    exit()

thread_tree = None
thread_title = '{} {} {}'.format(
    datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
    email_id_hash,
    email_subject.replace('/', '-')
)

if email_in_reply_to:
    try:
        assert head_tree

        # The hash of the email that is being replied to
        parent_message_hash = hashlib.sha256(email_in_reply_to.encode('utf-8')).hexdigest()[:8]

        # Find the thread (tree) containing the parent message
        for obj in head_tree:
            if obj.type_str == 'tree' and parent_message_hash + '.email' in obj:
                thread_tree = obj
                thread_title = obj.name
                break

        assert thread_tree

    except:
        # We only accept emails as reply to existing messages
        logging.debug('In-Reply-To message ID not found in repository: {}'.format(email_in_reply_to))
        exit()

# Add the new email BLOB to the git store
message_oid = repo.create_blob(message_raw)

# Add the blob that we've just created to the thread tree
thread_tree_builder = repo.TreeBuilder(thread_tree) if thread_tree else repo.TreeBuilder()
thread_tree_builder.insert(email_id_hash + '.email', message_oid, pygit2.GIT_FILEMODE_BLOB)
thread_tree_oid = thread_tree_builder.write()

# Add the thread tree to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
head_tree_builder.insert(thread_title, thread_tree_oid, pygit2.GIT_FILEMODE_TREE)
head_tree_oid = head_tree_builder.write()

repo.create_commit(
    'HEAD',                             # reference name
    pygit2.Signature('CLIF', '-'),      # author
    pygit2.Signature('CLIF', '-'),      # committer
    'New email.',                       # message
    head_tree_oid,                      # tree of this commit
    [] if repo.is_empty else [ repo.head.target ] # parents commit
)




# Forward email to list subscribers
###############################################################################

# Remove duplicates, if any
participants = list(set(subscribers))

# Find all the participants in the thread, ie. everyone that has sent an email
thread_tree = repo.get(thread_tree_oid)
for obj in thread_tree:
    try:
        obj_message = email.message_from_string(obj.data.decode('UTF-8'), policy=email.policy.default)
        obj_email_from = email.utils.parseaddr(obj_message.get('from'))[1]

        if obj_email_from not in participants:
            participants.append(obj_email_from)
    except:
        logging.warning('Could not parse file for searching participants: {}'.format(obj.name))

# Remove list address from participants in order to avoid forwarding to us
while email_to[1] in participants:
    participants.remove(email_to[1])

# Edit some headers before forwarding.
# Note: we need to delete them first because message[] is an append operator
# (the variable "message" is an instance of email.message.EmailMessage()).
for header in [ 'Sender', 'Reply-To', 'Return-Path', 'List-Archive', 'List-Id',
                'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]:
    del message[header]

message['Sender'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['Reply-To'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
# TODO if an email is bounced to this address, it should be removed from the address list
message['Return-Path'] = 'bounces@{}'.format(SERVER_DOMAIN)
message['List-Archive'] = ''
message['List-Id'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# message['List-Subscribe'] = '<>'
# message['List-Unsubscribe'] = '<>'
message['List-Post'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)

# Send emails
try:
    smtp_client = smtplib.SMTP('localhost')

    # From https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail:
    #   The from_addr and to_addrs parameters are used to construct the message
    #   envelope used by the transport agents. sendmail does not modify the message
    #   headers in any way.
    smtp_client.sendmail(
        '{}@{}'.format(repository_name, SERVER_DOMAIN),  # Envelope From
        participants,                                    # Envelope To
        str(message))                                    # Message

    logging.debug("Sent email {} to {}".format(email_subject, participants))
except Exception as e:
    logging.debug("Cannot send email {} to {}".format(email_subject, participants))
    logging.error(str(e))