#!/home/git/clif/venv/bin/python
###############################################################################
# This script is called by Postfix every time it receives an email.
# This script will accept incoming emails and add them to the mailing lists repositories.
###############################################################################
import datetime
import email
import email.policy
import hashlib
import logging
import os
import pygit2
import smtplib
import sys
###############################################################################
# SETTINGS
###############################################################################
# The "domain" part in address@domain for the mailing lists.
# All emails addressed to another domain will be ignored.
SERVER_DOMAIN = os.environ.get('SERVER_DOMAIN')
# The folder containing the git repositories.
# If using Gitolite, this is the Gitolite's "repositories" folder
REPOSITORIES_PATH = os.environ.get('REPOSITORIES_PATH')
# Level | Numeric value
# ---------|--------------
# CRITICAL | 50
# ERROR | 40
# WARNING | 30
# INFO | 20
# DEBUG | 10
# NOTSET | 0
logging.basicConfig(filename='/home/git/clif/emails.log',
level=logging.NOTSET,
format='[%(asctime)s] %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S%z')
assert SERVER_DOMAIN
assert REPOSITORIES_PATH
###############################################################################
# Validate incoming email
###############################################################################
# Retrieve the email message from stdin (piped from Postfix to this script)
message_raw = sys.stdin.read()
try:
message = email.message_from_string(message_raw, policy=email.policy.default)
email_id = message.get('message-id').strip()
email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID
email_from = email.utils.parseaddr(message.get('from'))
assert len(email_from[1]) > 0
email_to = email.utils.parseaddr(message.get('to'))
assert len(email_from[1]) > 0
assert email_to[1].endswith('@' + SERVER_DOMAIN)
email_in_reply_to = message.get('in-reply-to')
if email_in_reply_to:
email_in_reply_to = email_in_reply_to.strip()
email_subject = message.get('subject', '').strip()
# Accept plaintext only!
email_body = message.get_body(('plain',)).get_content().strip()
# Get the repository name. We use email addresses formatted as <repository>@SERVER_DOMAIN
repository_name = email_to[1].rsplit('@', 1)[0]
repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git')
# Repository names must not contain ".." otherwise it would be possible to
# point to folders outside REPOSITORIES_PATH
assert '..' not in repository_name
# Repositories must be <username>/<reponame>
assert '/' in repository_name
assert os.path.isdir(repository_path)
logging.info('Received valid email UID:{} From:{} To:{} Subject:"{}"'.format(
uid, email_from, email_to, email_subject))
except Exception as e:
logging.info('Received invalid email UID:{} From:{} To:{} Subject:"{}"'.format(
uid, email_from, email_to, email_subject))
logging.info(e)
exit()
# Load repository from disk
###############################################################################
try:
repo = pygit2.Repository(repository_path)
except:
logging.error('Not a valid repository: {}'.format(repository_path))
exit()
try:
head_tree = repo.revparse_single('HEAD').tree
except:
logging.info('Could not find HEAD ref for repository {}. A new tree will be created.'.format(repository_path))
head_tree = None
try:
subscribers = []
for addr in head_tree['subscribers'].data.decode('UTF-8').splitlines():
addr = addr.strip()
if len(addr) > 0:
subscribers.append(addr)
except:
subscribers = []
logging.info('Subscribers file not found or invalid for repository {}. A new one will be created.'.format(repository_path))
# Handle subscription requests
###############################################################################
# Is this a request for subscription?
request_subscribe = email_subject.upper('SUBSCRIBE')
request_unsubscribe = email_subject.upper('UNSUBSCRIBE')
if request_subscribe:
# Already subscribed?
if email_from[1] in subscribers:
logging.info('{} already subscribed to {}'.format(email_from, repository_path))
exit()
subscribers.append(email_from[1])
commit_message = 'Subscribe'
if request_unsubscribe
# Already unsubscribed?
if email_from[1] not in subscribers:
logging.info('{} already unsubscribed from {}'.format(email_from, repository_path))
exit()
subscribers = [ address for address in subscribers if address != email_from[1] ]
commit_message = 'Unsubscribe'
# Commit the new list of subscribers to the git repository
if request_subscribe or request_unsubscribe:
# Add a new BLOB to the git store
oid = repo.create_blob('\n'.join(subscribers).encode('UTF-8'))
# Add the blob that we've just created to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
head_tree_builder.insert('subscribers', oid, pygit2.GIT_FILEMODE_BLOB)
head_tree_oid = head_tree_builder.write()
repo.create_commit(
'HEAD', # reference name
pygit2.Signature('CLIF', '-'), # author
pygit2.Signature('CLIF', '-'), # committer
commit_message, # message
head_tree_oid, # tree of this commit
[] if repo.is_empty else [ repo.head.target ] # parents commit
)
if request_subscribe:
logging.info('{} is now subscribed to {}'.format(email_from, repository_path))
if request_unsubscribe:
logging.info('{} is now unsubscribed from {}'.format(email_from, repository_path))
exit()
# If it was not a subscription request, then it's a message. Add it to the
# repository.
# If the email contains the In-Reply-To header, we retrieve the existing tree
# for the thread. Otherwise, we will create a new tree.
###############################################################################
if not email_body or len(email_body) == 0:
logging.info('Refuting email without plaintext body: {}'.format(email_subject))
exit()
thread_tree = None
thread_title = '{} {} {}'.format(
datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
email_id_hash,
email_subject.replace('/', '-')
)
if email_in_reply_to:
try:
assert head_tree
# The hash of the email that is being replied to
parent_message_hash = hashlib.sha256(email_in_reply_to.encode('utf-8')).hexdigest()[:8]
# Find the thread (tree) containing the parent message
for obj in head_tree:
if obj.type_str == 'tree' and parent_message_hash + '.email' in obj:
thread_tree = obj
thread_title = obj.name
break
assert thread_tree
except:
# We only accept emails as reply to existing messages
logging.debug('In-Reply-To message ID not found in repository: {}'.format(email_in_reply_to))
exit()
# Add the new email BLOB to the git store
message_oid = repo.create_blob(message_raw)
# Add the blob that we've just created to the thread tree
thread_tree_builder = repo.TreeBuilder(thread_tree) if thread_tree else repo.TreeBuilder()
thread_tree_builder.insert(email_id_hash + '.email', message_oid, pygit2.GIT_FILEMODE_BLOB)
thread_tree_oid = thread_tree_builder.write()
# Add the thread tree to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree) if head_tree else repo.TreeBuilder()
head_tree_builder.insert(thread_title, thread_tree_oid, pygit2.GIT_FILEMODE_TREE)
head_tree_oid = head_tree_builder.write()
repo.create_commit(
'HEAD', # reference name
pygit2.Signature('CLIF', '-'), # author
pygit2.Signature('CLIF', '-'), # committer
'New email.', # message
head_tree_oid, # tree of this commit
[] if repo.is_empty else [ repo.head.target ] # parents commit
)
# Forward email to list subscribers
###############################################################################
# Remove duplicates, if any
participants = list(set(subscribers))
# Find all the participants in the thread, ie. everyone that has sent an email
thread_tree = repo.get(thread_tree_oid)
for obj in thread_tree:
try:
obj_message = email.message_from_string(obj.data.decode('UTF-8'), policy=email.policy.default)
obj_email_from = email.utils.parseaddr(obj_message.get('from'))[1]
if obj_email_from not in participants:
participants.append(obj_email_from)
except:
logging.warning('Could not parse file for searching participants: {}'.format(obj.name))
# Remove list address from participants in order to avoid forwarding to us
while email_to[1] in participants:
participants.remove(email_to[1])
# Edit some headers before forwarding.
# Note: we need to delete them first because message[] is an append operator
# (the variable "message" is an instance of email.message.EmailMessage()).
for header in [ 'Sender', 'Reply-To', 'Return-Path', 'List-Archive', 'List-Id',
'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]:
del message[header]
message['Sender'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['Reply-To'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
# TODO if an email is bounced to this address, it should be removed from the address list
message['Return-Path'] = 'bounces@{}'.format(SERVER_DOMAIN)
message['List-Archive'] = ''
message['List-Id'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# message['List-Subscribe'] = '<>'
# message['List-Unsubscribe'] = '<>'
message['List-Post'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# Send emails
try:
smtp_client = smtplib.SMTP('localhost')
# From https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail:
# The from_addr and to_addrs parameters are used to construct the message
# envelope used by the transport agents. sendmail does not modify the message
# headers in any way.
smtp_client.sendmail(
'{}@{}'.format(repository_name, SERVER_DOMAIN), # Envelope From
participants, # Envelope To
str(message)) # Message
logging.debug("Sent email {} to {}".format(email_subject, participants))
except Exception as e:
logging.debug("Cannot send email {} to {}".format(email_subject, participants))
logging.error(str(e))