1. Goal

Use AWS SES to receive emails and save attachments to an AWS S3 bucket.

2. Steps

2.1. create an identity for a domain or subdomain

You cannot use an existing email address because that would imply it's already managed by some other email service.

This domain/subdomain identity will be able to act as domain for a new email address to be managed by AWS SES.

20231005-150701_screenshot.jpeg

Because my domain is managed by Route53, I only need to fill in the domain I want to use and continue. Wait for a couple of minutes and it will automatically verify the [BROKEN LINK: CD627649-6E9F-46EB-AE68-5FB8F43B994E] settings.

20231005-150806_screenshot.jpeg

2.2. Publishing an MX record for Amazon SES email receiving

Get the SES endpoint for receiving the email. In my case it's: inbound-smtp.ap-southeast-2.amazonaws.com .

Create an [BROKEN LINK: CB40B160-9D7A-4BEC-BD85-F409B72E6F68] record. Record name should be your domain/subdomain.

20231005-151333_screenshot.jpeg

2.3. Giving permissions to Amazon SES for email receiving

Attach the following policy to the AWS S3 bucket you want to put the emails in.

{
  "Version":"2012-10-17",
  "Statement":[
    {
      "Sid":"AllowSESPuts",
      "Effect":"Allow",
      "Principal":{
        "Service":"ses.amazonaws.com"
      },
      "Action":"s3:PutObject",
      "Resource":"arn:aws:s3:::bucket-name/*",
      "Condition":{
        "StringEquals":{
          "AWS:SourceAccount":"account-number",
          "AWS:SourceArn": "arn:aws:ses:ap-southeast-2:account-number:receipt-rule-set/tax-invoices:receipt-rule/forward-attachment"
        }
      }
    }
  ]
}

2.4. Add lambda function to process the attachment

sample event

{"Records": [{"eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "ap-southeast-2", "eventTime": "2023-10-05T04:57:40.171Z", "eventName": "ObjectCreated:Put", "userIdentity": {"principalId": "AWS:ZXC2IU3VLCK5RY5:a41bc866759cc95c7330a"}, "requestParameters": {"sourceIPAddress": "101.0.4.6"}, "responseElements": {"x-amz-request-id": "6TS8WGRDNT4T3P5T", "x-amz-id-2": "4RuZDptjBdMxsC3"}, "s3": {"s3SchemaVersion": "1.0", "configurationId": "7b22853", "bucket": {"name": "bucket-name", "ownerIdentity": {"principalId": "8Z"}, "arn": "arn:aws:s3:::bucket-name"}, "object": {"key": "email/tax/82ghepkvblmfnh81", "size": "285170", "eTag": "cb607b6f486ae754444e", "sequencer": "41F793986"}}}]}
from email.message import MIMEPart
import email
import zipfile
import os
import gzip
import string
import boto3
import urllib
import email.utils

print('Loading function')

s3 = boto3.client('s3')
s3r = boto3.resource('s3')
tmp_dir = "/tmp/output/"

output_bucket = "bucket-name"

TAX_INVOICE_MAILBOX = 'tax-stuff@domain'
PRIVATE_INVOICE_MAILBOX = 'private-stuff@domain'


def get_mailbox(msg):
    mailbox = email.utils.parseaddr(msg.get("To"))[1]
    return mailbox


def extract_attachment(attachment, attachment_name):
    file_path = tmp_dir + attachment_name
    open(file_path, 'wb').write(attachment.get_payload(decode=True))

    return file_path


def upload_resulting_files_to_s3(
    mailbox, file_path, msg_date, file_name_custom_prefix=""
):
    # Put all XML back into S3 (Covers non-compliant cases if a ZIP contains multiple results)
    print("Uploading: " + file_path)  # File name to upload
    file_name = file_path.split('/')[-1]
    if len(file_name_custom_prefix) > 0:
        file_name = f"{file_name_custom_prefix}-{file_name}"

    output_prefix = f"invoice"
    if mailbox == TAX_INVOICE_MAILBOX:
        output_prefix = f'{output_prefix}/tax'
    else:
        output_prefix = f'{output_prefix}/private'
    output_prefix = f"{output_prefix}/{str(msg_date.year)}/{str(msg_date.month)}"
    s3r.meta.client.upload_file(
        file_path, output_bucket, f"{output_prefix}/{file_name}"
    )


def get_attachment_name(msg):
    attachment_name = "default_name"
    if msg.is_multipart():
        for part in msg.walk():
            content_disposition = part.get("Content-Disposition", "")
            if content_disposition.startswith("attachment"):
                attachment_name = part.get_filename()
            elif content_disposition.startswith("inline"):
                attachment_name = part.get_filename()
                # additional handling for inline attachments if needed
            else:
                # handle other content types if needed
                pass
    else:
        # handle single part email if needed
        pass
    return attachment_name


# Delete the file in the current bucket
def delete_file(key, bucket):
    s3.delete_object(Bucket=bucket, Key=key)
    print("%s deleted fom %s ") % (key, bucket)


def lambda_handler(event, context):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'])
    print(f"bucket: {bucket}")

    try:
        response = s3r.Bucket(bucket).Object(key)

        # Read the raw text file into a Email Object
        raw_msg = response.get()["Body"].read().decode("utf-8")
        mime_msg = email.message_from_string(raw_msg, _class=MIMEPart)
        plain_msg = email.message_from_string(raw_msg)

        file_name_custom_prefix = ""
        for p in plain_msg.walk():
            if p.get_content_type() == "text/plain":
                if "!!!" in p.get_payload():
                    file_name_custom_prefix = (
                        p.get_payload().split("\\n")[0].split("\n")[0].strip()
                    )
                    file_name_custom_prefix = file_name_custom_prefix.replace("!", "")
                print(f"prefix: {file_name_custom_prefix}")

        msg_date = email.utils.parsedate_to_datetime(mime_msg.get("Date"))
        print("Message Date:", msg_date)

        # Create directory for XML files (makes debugging easier)
        if os.path.isdir(tmp_dir) == False:
            os.mkdir(tmp_dir)

        for attachment in mime_msg.iter_attachments():
            # The first attachment
            # attachment = msg.get_payload()[0]
            print(attachment.get_content_type())
            # Extract the attachment into /tmp/output
            file_path = extract_attachment(attachment, get_attachment_name(mime_msg))

            # Upload the XML files to S3
            upload_resulting_files_to_s3(
                get_mailbox(mime_msg),
                file_path,
                msg_date,
                file_name_custom_prefix,
            )

        return 0
    except Exception as e:
        print(e)
        print(
            'Error getting object {} from bucket {}. Make sure they exist '
            'and your bucket is in the same region as this '
            'function.'.format(key, bucket)
        )
        raise e


#    delete_file(key, bucket)

2.5. Remember to verify DKIM

As of Jan 2024 the above steps didn't work for one of my email receiving domains because I moved my domain's Name Servers to Cloudflare.

I was receiving this error:

550 5.1.1 Requested action not taken: mailbox unavailable

Basically it means the server couldn't be found.

Eventually I figured out that SES email receiving domain needed DKIM signing/verification.

Grabbed the required records from SES console and added them in [BROKEN LINK: FC780D1D-2F80-4CDC-B4F8-038A2161E7E8] and it's all good.

20240130-214256_screenshot.jpeg