#!/home/git/clif/venv/bin/python
###############################################################################
# This script is called by Postfix every time it receives an email.
# This script will accept incoming emails and add them to the mailing lists repositories.
###############################################################################
import datetime
import email
import email.policy
import hashlib
import logging
import os
import pygit2
import smtplib
import sys
###############################################################################
# SETTINGS
###############################################################################
# The "domain" part in address@domain that we're expecting to see.
# All emails addressed to another domain will be ignored.
SERVER_DOMAIN = 'domain.local'
REPOSITORIES_PATH = '/home/git/repositories'
# Level | Numeric value
# ---------|--------------
# CRITICAL | 50
# ERROR | 40
# WARNING | 30
# INFO | 20
# DEBUG | 10
# NOTSET | 0
logging.basicConfig(filename='/home/git/clif/emails.log',
level=logging.NOTSET,
format='[%(asctime)s] %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S%z')
###############################################################################
# ACCEPT/VALIDATE INCOMING EMAIL
###############################################################################
# Retrieve the email message from stdin (Postfix has piped this script)
message_raw = sys.stdin.read()
message = email.message_from_string(message_raw, policy=email.policy.default)
try:
email_id = message.get('message-id').strip()
except:
logging.error('Refuting email without a Message-ID: {}'.format(email_subject))
exit()
email_id_hash = hashlib.sha256(email_id.encode('utf-8')).hexdigest()[:8] # This will be used as thread ID
try:
email_from = email.utils.parseaddr(message.get('from'))
assert len(email_from[1]) > 0
except:
logging.error('Refuting email with From header: {}'.format(email_from))
exit()
try:
email_to = email.utils.parseaddr(message.get('to'))
assert len(email_from[1]) > 0
assert email_to[1].endswith('@' + SERVER_DOMAIN)
except:
logging.error('Refuting email with To header: {}'.format(email_to))
exit()
email_in_reply_to = message.get('in-reply-to')
if email_in_reply_to:
email_in_reply_to = email_in_reply_to.strip()
try:
email_subject = message.get('subject').strip()
except:
email_subject = ''
try:
# Accept plaintext only!
email_body = message.get_body(('plain',)).get_content()
except:
email_body = ''
logging.info('Received email from {} to {} with subject "{}"'.format(email_from, email_to, email_subject))
# Get the repository name. We use email addresses formatted as <repository>@SERVER_DOMAIN
repository_name = email_to[1].rsplit('@', 1)[0]
# Is this a request for subscription?
request_subscribe = repository_name.endswith('+subscribe')
request_unsubscribe = repository_name.endswith('+unsubscribe')
# Remove leading command: from address
if request_subscribe: repository_name = repository_name[:-10]
if request_unsubscribe: repository_name = repository_name[:-12]
repository_path = os.path.join(REPOSITORIES_PATH, repository_name + '.mlist.git')
if '..' in repository_name:
logging.error('Refuting email because the repository name contains "..": {}'.format(repository_name))
exit()
# All repositories should be <username>/<reponame>
if '/' not in repository_name:
logging.error('Refuting email because the repository name does not contain a namespace: {}'.format(repository_name))
exit()
if not os.path.isdir(repository_path):
logging.error('Repository path does not exist: {}'.format(repository_path))
exit()
try:
repo = pygit2.Repository(repository_path)
except:
logging.error('Not a valid repository: {}'.format(repository_path))
exit()
try:
head_tree = repo.revparse_single('HEAD').tree
except:
logging.error('Could not find HEAD ref: {}'.format(repository_path))
exit()
try:
subscribers = []
for addr in head_tree['subscribers'].data.decode('UTF-8').splitlines():
addr = addr.strip()
if len(addr) > 0:
subscribers.append(addr)
except:
subscribers = []
logging.info('Subscribers file not found in {}'.format(repository_path))
###############################################################################
# LISTS SUBSCRIPTION
###############################################################################
if request_subscribe \
and email_from[1] in subscribers:
# Already subscribed
exit()
if request_unsubscribe \
and email_from[1] not in subscribers:
# No address to removed
exit()
if request_subscribe or request_unsubscribe:
if request_subscribe:
subscribers.append(email_from[1])
commit_message = 'Subscribe'
if request_unsubscribe:
subscribers = [ address for address in subscribers if address != email_from[1] ]
commit_message = 'Unsubscribe'
# Add a new BLOB to the git store
oid = repo.create_blob('\n'.join(subscribers).encode('UTF-8'))
# Add the blob that we've just created to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree)
head_tree_builder.insert('subscribers', oid, pygit2.GIT_FILEMODE_BLOB)
head_tree_oid = head_tree_builder.write()
repo.create_commit(
repo.head.name, # reference name
pygit2.Signature('CLIF', '-'), # author
pygit2.Signature('CLIF', '-'), # committer
commit_message, # message
head_tree_oid, # tree of this commit
[ repo.head.target ] # parents commit
)
exit()
###############################################################################
# ADD EMAIL TO USER REPOSITORY
###############################################################################
if len(email_subject) == 0:
logging.info('Refuting email with no subject: {}'.format(email_id))
exit()
if not email_body:
logging.warning('Refuting email without plaintext body: {}'.format(email_subject))
exit()
if len(email_body.strip()) == 0:
logging.info('Refuting email with empty body: {}'.format(email_id))
exit()
logging.debug('Accepting email from {} to {} with subject {}'.format(email_from, email_to, email_subject))
# At this point we need to add the incoming email to the repository.
# If the email is a reply (ie. it contains the In-Reply-To header, we retrieve the
# existing tree for the thread. Otherwise, we will create a new tree.
thread_tree = None
thread_title = '{} {} {}'.format(
datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
email_id_hash,
email_subject.replace('/', '-')
)
if email_in_reply_to:
# The hash of the email that is being replied to
parent_message_hash = hashlib.sha256(email_in_reply_to.encode('utf-8')).hexdigest()[:8]
# Find the thread (tree) containing the parent message
for obj in head_tree:
if obj.type_str == 'tree' and parent_message_hash + '.email' in obj:
thread_tree = obj
thread_title = obj.name
break
# We only accept emails in reply to existing messages
if not thread_tree:
logging.debug('In-Reply-To ID not found in repository: {}'.format(email_in_reply_to))
exit()
# Add the new email BLOB to the git store
message_oid = repo.create_blob(message_raw)
# Add the blob that we've just created to the thread tree
thread_tree_builder = repo.TreeBuilder(thread_tree) if thread_tree else repo.TreeBuilder()
thread_tree_builder.insert(email_id_hash + '.email', message_oid, pygit2.GIT_FILEMODE_BLOB)
thread_tree_oid = thread_tree_builder.write()
# Add the thread tree to the HEAD tree
head_tree_builder = repo.TreeBuilder(head_tree)
head_tree_builder.insert(thread_title, thread_tree_oid, pygit2.GIT_FILEMODE_TREE)
head_tree_oid = head_tree_builder.write()
repo.create_commit(
repo.head.name, # reference name
pygit2.Signature('CLIF', '-'), # author
pygit2.Signature('CLIF', '-'), # committer
'New email.', # message
head_tree_oid, # tree of this commit
[ repo.head.target ] # parents commit
)
###############################################################################
# FORWARD EMAIL TO THREAD PARTICIPANTS AND TO LIST SUBSCRIBERS
###############################################################################
# Remove duplicates, if any
participants = list(set(subscribers))
# Find all the participants in the thread, ie. everyone that has sent an email
thread_tree = repo.get(thread_tree_oid)
for obj in thread_tree:
try:
obj_message = email.message_from_string(obj.data.decode('UTF-8'), policy=email.policy.default)
obj_email_from = email.utils.parseaddr(obj_message.get('from'))[1]
if obj_email_from not in participants:
participants.append(obj_email_from)
except:
logging.warning('Could not parse file for searching participants: {}'.format(obj.name))
# Remove list address from participants in order to avoid forwarding to us
while email_to[1] in participants:
participants.remove(email_to[1])
# Modify some headers before forwarding.
# Note: we need to delete them first because the SMTP client will only accept one
# of them at most, but message[] is an append operator ("message" is an instance of
# email.message.EmailMessage())
# https://docs.python.org/3/library/email.message.html#email.message.EmailMessage
# TODO Some ISPs add the client IP to the email headers. Should we remove *all*
# unnecessary headers instead?
for header in [ 'Sender', 'Reply-To',
'List-Id', 'List-Subscribe', 'List-Unsubscribe', 'List-Post' ]:
del message[header]
message['Sender'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['Reply-To'] = '{}@{}'.format(repository_name, SERVER_DOMAIN)
message['List-Id'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# message['List-Subscribe'] = '<>'
# message['List-Unsubscribe'] = '<>'
message['List-Post'] = '<{}@{}>'.format(repository_name, SERVER_DOMAIN)
# Forward email to participants
try:
smtp_client = smtplib.SMTP('localhost')
# "The from_addr and to_addrs parameters are used to construct the message envelope
# used by the transport agents. sendmail does not modify the message headers in any way."
# - https://docs.python.org/3/library/smtplib.html#smtplib.SMTP.sendmail
smtp_client.sendmail(
'{}@{}'.format(repository_name, SERVER_DOMAIN), # Envelope From
participants, # Envelope To
str(message)) # Message
logging.debug("Successfully sent emails.")
except Exception as e:
logging.error("Error sending emails.")
logging.error(str(e))