diff --git a/emails.py b/emails.py index 4a6de1a..334b6f3 100755 --- a/emails.py +++ b/emails.py @@ -20,11 +20,13 @@ import sys # SETTINGS ############################################################################### -# The "domain" part in address@domain that we're expecting to see. +# The "domain" part in address@domain for the mailing lists. # All emails addressed to another domain will be ignored. -SERVER_DOMAIN = 'domain.local' +SERVER_DOMAIN = None -REPOSITORIES_PATH = '/home/git/repositories' +# The folder containing the git repositories. +# If using Gitolite, this is the Gitolite's "repositories" folder +REPOSITORIES_PATH = None # Level | Numeric value # ---------|-------------- @@ -39,83 +41,72 @@ logging.basicConfig(filename='/home/git/clif/emails.log', format='[%(asctime)s] %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S%z') +assert SERVER_DOMAIN +assert REPOSITORIES_PATH +############################################################################### -############################################################################### -# ACCEPT/VALIDATE INCOMING EMAIL + + +# Validate incoming email ############################################################################### -# Retrieve the email message from stdin (Postfix has piped this script) +# Retrieve the email message from stdin (piped from Postfix to this script) message_raw = sys.stdin.read() -message = email.message_from_string(message_raw, policy=email.policy.default) try: - email_id = message.get('message-id').strip() -except: - logging.error('Refuting email without a Message-ID: {}'.format(email_subject)) - exit() + message = email.message_from_string(message_raw, policy=email.policy.default) -email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID + email_id = message.get('message-id').strip() + email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID -try: email_from = email.utils.parseaddr(message.get('from')) assert len(email_from[1]) > 0 -except: - logging.error('Refuting email with From header: {}'.format(email_from)) - exit() -try: email_to = email.utils.parseaddr(message.get('to')) assert len(email_from[1]) > 0 assert email_to[1].endswith('@' + SERVER_DOMAIN) -except: - logging.error('Refuting email with To header: {}'.format(email_to)) - exit() -email_in_reply_to = message.get('in-reply-to') -if email_in_reply_to: - email_in_reply_to = email_in_reply_to.strip() + email_in_reply_to = message.get('in-reply-to') + if email_in_reply_to: + email_in_reply_to = email_in_reply_to.strip() -try: - email_subject = message.get('subject').strip() -except: - email_subject = '' + email_subject = message.get('subject', '').strip() -try: # Accept plaintext only! - email_body = message.get_body(('plain',)).get_content() -except: - email_body = '' + email_body = message.get_body(('plain',)).get_content().strip() -logging.info('Received email from {} to {} with subject "{}"'.format(email_from, email_to, email_subject)) + # Get the repository name. We use email addresses formatted as @SERVER_DOMAIN + repository_name = email_to[1].rsplit('@', 1)[0] -# Get the repository name. We use email addresses formatted as @SERVER_DOMAIN -repository_name = email_to[1].rsplit('@', 1)[0] + repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git') -# Is this a request for subscription? -request_subscribe = repository_name.endswith('+subscribe') -request_unsubscribe = repository_name.endswith('+unsubscribe') + # Repository names must not contain ".." otherwise it would be possible to + # point to folders outside REPOSITORIES_PATH + assert '..' not in repository_name + # Repositories must be / + assert '/' in repository_name -# Remove command from address -if request_subscribe: repository_name = repository_name[:-10] -if request_unsubscribe: repository_name = repository_name[:-12] + assert os.path.isdir(repository_path) -repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git') + logging.info('Received valid email UID:{} From:{} To:{} Subject:"{}"'.format( + uid, email_from, email_to, email_subject)) -if '..' in repository_name: - logging.error('Refuting email because the repository name contains "..": {}'.format(repository_name)) - exit() +except Exception as e: -# All repositories should be / -if '/' not in repository_name: - logging.error('Refuting email because the repository name does not contain a namespace: {}'.format(repository_name)) - exit() + logging.info('Received invalid email UID:{} From:{} To:{} Subject:"{}"'.format( + uid, email_from, email_to, email_subject)) -if not os.path.isdir(repository_path): - logging.error('Repository path does not exist: {}'.format(repository_path)) + logging.info(e) exit() + + + +# Load repository from disk +############################################################################### + try: repo = pygit2.Repository(repository_path) except: @@ -125,7 +116,7 @@ except: try: head_tree = repo.revparse_single('HEAD').tree except: - logging.warning('Could not find HEAD ref: {}'.format(repository_path)) + logging.info('Could not find HEAD ref for repository {}. A new tree will be created.'.format(repository_path)) head_tree = None try: @@ -136,36 +127,38 @@ try: subscribers.append(addr) except: subscribers = [] - logging.info('Subscribers file not found or invalid: {}'.format(repository_path)) + logging.info('Subscribers file not found or invalid for repository {}. A new one will be created.'.format(repository_path)) -############################################################################### -# LISTS SUBSCRIPTION +# Handle subscription requests ############################################################################### -if request_subscribe and (email_from[1] in subscribers): - # Already subscribed - - logging.info('Already subscribed to {}: {}'.format(repository_path, email_from)) - exit() +# Is this a request for subscription? +request_subscribe = email_subject.upper('SUBSCRIBE') +request_unsubscribe = email_subject.upper('UNSUBSCRIBE') -if request_unsubscribe and (email_from[1] not in subscribers): - # No address to remove - - logging.info('Already unsubscribed from {}: {}'.format(repository_path, email_from)) - exit() +if request_subscribe: + # Already subscribed? + if email_from[1] in subscribers: + logging.info('{} already subscribed to {}'.format(email_from, repository_path)) + exit() -if request_subscribe or request_unsubscribe: - if request_subscribe: - subscribers.append(email_from[1]) - commit_message = 'Subscribe' + subscribers.append(email_from[1]) + commit_message = 'Subscribe' - if request_unsubscribe: - subscribers = [ address for address in subscribers if address != email_from[1] ] - commit_message = 'Unsubscribe' +if request_unsubscribe + # Already unsubscribed? + if email_from[1] not in subscribers: + logging.info('{} already unsubscribed from {}'.format(email_from, repository_path)) + exit() + subscribers = [ address for address in subscribers if address != email_from[1] ] + commit_message = 'Unsubscribe' + +# Commit the new list of subscribers to the git repository +if request_subscribe or request_unsubscribe: # Add a new BLOB to the git store oid = repo.create_blob('\n'.join(subscribers).encode('UTF-8')) @@ -182,39 +175,27 @@ if request_subscribe or request_unsubscribe: head_tree_oid, # tree of this commit [] if repo.is_empty else [ repo.head.target ] # parents commit ) - + if request_subscribe: - logging.info('Subscribed to {}: {}'.format(repository_path, email_from)) + logging.info('{} is now subscribed to {}'.format(email_from, repository_path)) if request_unsubscribe: - logging.info('Unsubscribed from {}: {}'.format(repository_path, email_from)) - + logging.info('{} is now unsubscribed from {}'.format(email_from, repository_path)) + exit() -############################################################################### -# ADD EMAIL TO USER REPOSITORY +# If it was not a subscription request, then it's a message. Add it to the +# repository. +# If the email contains the In-Reply-To header, we retrieve the existing tree +# for the thread. Otherwise, we will create a new tree. ############################################################################### -if len(email_subject) == 0: - logging.info('Refuting email with no subject: {}'.format(email_id)) +if not email_body or len(email_body) == 0: + logging.info('Refuting email without plaintext body: {}'.format(email_subject)) exit() -if not email_body: - logging.warning('Refuting email without plaintext body: {}'.format(email_subject)) - exit() - -if len(email_body.strip()) == 0: - logging.info('Refuting email with empty body: {}'.format(email_id)) - exit() - -logging.debug('Accepting email from {} to {} with subject {}'.format(email_from, email_to, email_subject)) - -# At this point we need to add the incoming email to the repository. -# If the email is a reply (ie. it contains the In-Reply-To header, we retrieve the -# existing tree for the thread. Otherwise, we will create a new tree. - thread_tree = None thread_title = '{} {} {}'.format( datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), @@ -225,7 +206,7 @@ thread_title = '{} {} {}'.format( if email_in_reply_to: try: assert head_tree - + # The hash of the email that is being replied to parent_message_hash = hashlib.sha256(email_in_reply_to.encode('utf-8')).hexdigest()[:8] @@ -235,9 +216,9 @@ if email_in_reply_to: thread_tree = obj thread_title = obj.name break - + assert thread_tree - + except: # We only accept emails as reply to existing messages logging.debug('In-Reply-To message ID not found in repository: {}'.format(email_in_reply_to)) @@ -268,8 +249,7 @@ repo.create_commit( -############################################################################### -# FORWARD EMAIL TO THREAD PARTICIPANTS AND TO LIST SUBSCRIBERS +# Forward email to list subscribers ############################################################################### # Remove duplicates, if any @@ -281,7 +261,7 @@ for obj in thread_tree: try: obj_message = email.message_from_string(obj.data.decode('UTF-8'), policy=email.policy.default) obj_email_from = email.utils.parseaddr(obj_message.get('from'))[1] - + if obj_email_from not in participants: participants.append(obj_email_from) except: @@ -291,37 +271,37 @@ for obj in thread_tree: while email_to[1] in participants: participants.remove(email_to[1]) -# Modify some headers before forwarding. -# Note: we need to delete them first because the SMTP client will only accept one -# of them at most, but message[] is an append operator ("message" is an instance of -# email.message.EmailMessage()) -# https://docs.python.org/3/library/email.message.html#email.message.EmailMessage -# TODO Some ISPs add the client IP to the email headers. Should we remove *all* -# unnecessary headers instead? -for header in [ 'Sender', 'Reply-To', - 'List-Id', 'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]: +# Edit some headers before forwarding. +# Note: we need to delete them first because message[] is an append operator +# (the variable "message" is an instance of email.message.EmailMessage()). +for header in [ 'Sender', 'Reply-To', 'Return-Path', 'List-Archive', 'List-Id', + 'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]: del message[header] message['Sender'] = '{}@{}'.format(repository_name, SERVER_DOMAIN) message['Reply-To'] = '{}@{}'.format(repository_name, SERVER_DOMAIN) +# TODO if an email is bounced to this address, it should be removed from the address list +message['Return-Path'] = 'bounces@{}'.format(SERVER_DOMAIN) +message['List-Archive'] = '' message['List-Id'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN) # message['List-Subscribe'] = '<>' # message['List-Unsubscribe'] = '<>' message['List-Post'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN) -# Forward email to participants +# Send emails try: smtp_client = smtplib.SMTP('localhost') - - # "The from_addr and to_addrs parameters are used to construct the message envelope - # used by the transport agents. sendmail does not modify the message headers in any way." - # - https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail + + # From https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail: + # The from_addr and to_addrs parameters are used to construct the message + # envelope used by the transport agents. sendmail does not modify the message + # headers in any way. smtp_client.sendmail( '{}@{}'.format(repository_name, SERVER_DOMAIN), # Envelope From participants, # Envelope To str(message)) # Message - - logging.debug("Successfully sent emails.") + + logging.debug("Sent email {} to {}".format(email_subject, participants)) except Exception as e: - logging.error("Error sending emails.") + logging.debug("Cannot send email {} to {}".format(email_subject, participants)) logging.error(str(e))