home » zplus/clif.git
ID: 4a6de1a7832907443bb69b38c14de6238c49018d
327 lines — 11K — View raw


#!/home/git/clif/venv/bin/python

###############################################################################
# This script is called by Postfix every time it receives an email.
# This script will accept incoming emails and add them to the mailing lists repositories.
###############################################################################

import datetime
import email
import email.policy
import hashlib
import logging
import os
import pygit2
import smtplib
import sys


###############################################################################
# SETTINGS
###############################################################################

# The "domain" part in address@domain that we're expecting to see.
# All emails addressed to another domain will be ignored.
SERVER_DOMAIN = 'domain.local'

REPOSITORIES_PATH = '/home/git/repositories'

# Level    | Numeric value
# ---------|--------------
# CRITICAL | 50
# ERROR    | 40
# WARNING  | 30
# INFO     | 20
# DEBUG    | 10
# NOTSET   |  0
logging.basicConfig(filename='/home/git/clif/emails.log',
                    level=logging.NOTSET,
                    format='[%(asctime)s] %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S%z')




###############################################################################
# ACCEPT/VALIDATE INCOMING EMAIL
###############################################################################

# Retrieve the email message from stdin (Postfix has piped this script)
message_raw = sys.stdin.read()
message = email.message_from_string(message_raw, policy=email.policy.default)

try:
    email_id = message.get('message-id').strip()
except:
    logging.error('Refuting email without a Message-ID: {}'.format(email_subject))
    exit()

email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID

try:
    email_from = email.utils.parseaddr(message.get('from'))
    assert len(email_from[1]) > 0
except:
    logging.error('Refuting email with From header: {}'.format(email_from))
    exit()

try:    
    email_to = email.utils.parseaddr(message.get('to'))
    assert len(email_from[1]) > 0
    assert email_to[1].endswith('@' + SERVER_DOMAIN)
except:
    logging.error('Refuting email with To header: {}'.format(email_to))
    exit()

email_in_reply_to = message.get('in-reply-to')
if email_in_reply_to:
    email_in_reply_to = email_in_reply_to.strip()

try:
    email_subject = message.get('subject').strip()
except:
    email_subject = ''

try:
    # Accept plaintext only!
    email_body = message.get_body(('plain',)).get_content()
except:
    email_body = ''

logging.info('Received email from {} to {} with subject "{}"'.format(email_from, email_to, email_subject))

# Get the repository name. We use email addresses formatted as <repository>@SERVER_DOMAIN
repository_name = email_to[1].rsplit('@', 1)[0]

# Is this a request for subscription?
request_subscribe   = repository_name.endswith('+subscribe')
request_unsubscribe = repository_name.endswith('+unsubscribe')

# Remove command from address
if request_subscribe:   repository_name = repository_name[:-10]
if request_unsubscribe: repository_name = repository_name[:-12]

repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git')

if '..' in repository_name:
    logging.error('Refuting email because the repository name contains "..": {}'.format(repository_name))
    exit()

# All repositories should be <username>/<reponame>
if '/' not in repository_name:
    logging.error('Refuting email because the repository name does not contain a namespace: {}'.format(repository_name))
    exit()

if not os.path.isdir(repository_path):
    logging.error('Repository path does not exist: {}'.format(repository_path))
    exit()

try:
    repo = pygit2.Repository(repository_path)
except:
    logging.error('Not a valid repository: {}'.format(repository_path))
    exit()

try:
    head_tree = repo.revparse_single('HEAD').tree
except:
    logging.warning('Could not find HEAD ref: {}'.format(repository_path))
    head_tree = None

try:
    subscribers = []
    for addr in head_tree['subscribers'].data.decode('UTF-8').splitlines():
        addr = addr.strip()
        if len(addr) > 0:
            subscribers.append(addr)
except:
    subscribers = []
    logging.info('Subscribers file not found or invalid: {}'.format(repository_path))




###############################################################################
# LISTS SUBSCRIPTION
###############################################################################

if request_subscribe and (email_from[1] in subscribers):
    # Already subscribed
    
    logging.info('Already subscribed to {}: {}'.format(repository_path, email_from))
    exit()

if request_unsubscribe and (email_from[1] not in subscribers):
    # No address to remove
    
    logging.info('Already unsubscribed from {}: {}'.format(repository_path, email_from))
    exit()

if request_subscribe or request_unsubscribe:
    if request_subscribe:
        subscribers.append(email_from[1])
        commit_message = 'Subscribe'

    if request_unsubscribe:
        subscribers = [ address for address in subscribers if address != email_from[1] ]
        commit_message = 'Unsubscribe'

    # Add a new BLOB to the git store
    oid = repo.create_blob('\n'.join(subscribers).encode('UTF-8'))

    # Add the blob that we've just created to the HEAD tree
    head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
    head_tree_builder.insert('subscribers', oid, pygit2.GIT_FILEMODE_BLOB)
    head_tree_oid = head_tree_builder.write()

    repo.create_commit(
        'HEAD',                             # reference name
        pygit2.Signature('CLIF', '-'),      # author
        pygit2.Signature('CLIF', '-'),      # committer
        commit_message,                     # message
        head_tree_oid,                      # tree of this commit
        [] if repo.is_empty else [ repo.head.target ] # parents commit
    )
    
    if request_subscribe:
        logging.info('Subscribed to {}: {}'.format(repository_path, email_from))
    if request_unsubscribe:
        logging.info('Unsubscribed from {}: {}'.format(repository_path, email_from))
    
    exit()




###############################################################################
# ADD EMAIL TO USER REPOSITORY
###############################################################################

if len(email_subject) == 0:
    logging.info('Refuting email with no subject: {}'.format(email_id))
    exit()

if not email_body:
    logging.warning('Refuting email without plaintext body: {}'.format(email_subject))
    exit()

if len(email_body.strip()) == 0:
    logging.info('Refuting email with empty body: {}'.format(email_id))
    exit()

logging.debug('Accepting email from {} to {} with subject {}'.format(email_from, email_to, email_subject))

# At this point we need to add the incoming email to the repository.
# If the email is a reply (ie. it contains the In-Reply-To header, we retrieve the
# existing tree for the thread. Otherwise, we will create a new tree.

thread_tree = None
thread_title = '{} {} {}'.format(
    datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
    email_id_hash,
    email_subject.replace('/', '-')
)

if email_in_reply_to:
    try:
        assert head_tree
        
        # The hash of the email that is being replied to
        parent_message_hash = hashlib.sha256(email_in_reply_to.encode('utf-8')).hexdigest()[:8]

        # Find the thread (tree) containing the parent message
        for obj in head_tree:
            if obj.type_str == 'tree' and parent_message_hash + '.email' in obj:
                thread_tree = obj
                thread_title = obj.name
                break
        
        assert thread_tree
        
    except:
        # We only accept emails as reply to existing messages
        logging.debug('In-Reply-To message ID not found in repository: {}'.format(email_in_reply_to))
        exit()

# Add the new email BLOB to the git store
message_oid = repo.create_blob(message_raw)

# Add the blob that we've just created to the thread tree
thread_tree_builder = repo.TreeBuilder(thread_tree) if thread_tree else repo.TreeBuilder()
thread_tree_builder.insert(email_id_hash + '.email', message_oid, pygit2.GIT_FILEMODE_BLOB)
thread_tree_oid = thread_tree_builder.write()

# Add the thread tree to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
head_tree_builder.insert(thread_title, thread_tree_oid, pygit2.GIT_FILEMODE_TREE)
head_tree_oid = head_tree_builder.write()

repo.create_commit(
    'HEAD',                             # reference name
    pygit2.Signature('CLIF', '-'),      # author
    pygit2.Signature('CLIF', '-'),      # committer
    'New email.',                       # message
    head_tree_oid,                      # tree of this commit
    [] if repo.is_empty else [ repo.head.target ] # parents commit
)




###############################################################################
# FORWARD EMAIL TO THREAD PARTICIPANTS AND TO LIST SUBSCRIBERS
###############################################################################

# Remove duplicates, if any
participants = list(set(subscribers))

# Find all the participants in the thread, ie. everyone that has sent an email
thread_tree = repo.get(thread_tree_oid)
for obj in thread_tree:
    try:
        obj_message = email.message_from_string(obj.data.decode('UTF-8'), policy=email.policy.default)
        obj_email_from = email.utils.parseaddr(obj_message.get('from'))[1]
        
        if obj_email_from not in participants:
            participants.append(obj_email_from)
    except:
        logging.warning('Could not parse file for searching participants: {}'.format(obj.name))

# Remove list address from participants in order to avoid forwarding to us
while email_to[1] in participants:
    participants.remove(email_to[1])

# Modify some headers before forwarding.
# Note: we need to delete them first because the SMTP client will only accept one
# of them at most, but message[] is an append operator ("message" is an instance of
# email.message.EmailMessage())
# https://docs.python.org/3/library/email.message.html#email.message.EmailMessage
# TODO Some ISPs add the client IP to the email headers. Should we remove *all*
#      unnecessary headers instead?
for header in [ 'Sender', 'Reply-To',
                'List-Id', 'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]:
    del message[header]

message['Sender'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['Reply-To'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['List-Id'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# message['List-Subscribe'] = '<>'
# message['List-Unsubscribe'] = '<>'
message['List-Post'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)

# Forward email to participants
try:
    smtp_client = smtplib.SMTP('localhost')
    
    # "The from_addr and to_addrs parameters are used to construct the message envelope
    # used by the transport agents. sendmail does not modify the message headers in any way."
    #    - https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail
    smtp_client.sendmail(
        '{}@{}'.format(repository_name, SERVER_DOMAIN),  # Envelope From
        participants,                                    # Envelope To
        str(message))                                    # Message
    
    logging.debug("Successfully sent emails.")
except Exception as e:
    logging.error("Error sending emails.")
    logging.error(str(e))