Sending a properly encoded email that contains non-ASCII characters is not as trivial as it should be. Here's more or less what I want:
# U+263A and U+263B are smiley faces (☺ and ☻) sender = u'Sender \u263A <sender@example.com>' recipient = u'Recipient \u263B <recipient@example.com>' subject = u'Smile! \u263A' body = u'Smile!\n\u263B' send_email(sender, recipient, subject, body)
The hard part is getting all the unicode strings to be properly encoded in the email. Details like multiple recipients, additional headers, attachments, SMTP configuration and error handling are ignored for the purposes of this article.
Here's the solution:
from smtplib import SMTP from email.MIMEText import MIMEText from email.Header import Header from email.Utils import parseaddr, formataddr def send_email(sender, recipient, subject, body): """Send an email. All arguments should be Unicode strings (plain ASCII works as well). Only the real name part of sender and recipient addresses may contain non-ASCII characters. The email will be properly MIME encoded and delivered though SMTP to localhost port 25. This is easy to change if you want something different. The charset of the email will be the first one out of US-ASCII, ISO-8859-1 and UTF-8 that can represent all the characters occurring in the email. """ # Header class is smart enough to try US-ASCII, then the charset we # provide, then fall back to UTF-8. header_charset = 'ISO-8859-1' # We must choose the body charset manually for body_charset in 'US-ASCII', 'ISO-8859-1', 'UTF-8': try: body.encode(body_charset) except UnicodeError: pass else: break # Split real name (which is optional) and email address parts sender_name, sender_addr = parseaddr(sender) recipient_name, recipient_addr = parseaddr(recipient) # We must always pass Unicode strings to Header, otherwise it will # use RFC 2047 encoding even on plain ASCII strings. sender_name = str(Header(unicode(sender_name), header_charset)) recipient_name = str(Header(unicode(recipient_name), header_charset)) # Make sure email addresses do not contain non-ASCII characters sender_addr = sender_addr.encode('ascii') recipient_addr = recipient_addr.encode('ascii') # Create the message ('plain' stands for Content-Type: text/plain) msg = MIMEText(body.encode(body_charset), 'plain', body_charset) msg['From'] = formataddr((sender_name, sender_addr)) msg['To'] = formataddr((recipient_name, recipient_addr)) msg['Subject'] = Header(unicode(subject), header_charset) # Send the message via SMTP to localhost:25 smtp = SMTP("localhost") smtp.sendmail(sender, recipient, msg.as_string()) smtp.quit()
I wish I could write it like this:
from smtplib import SMTP from email.MIMEText import MIMEText def send_email(sender, recipient, subject, body): """Science-fictional simple version of send_email.""" # The email module should be able to deal with Unicode message bodies and # headers and pick an appropriate charset automatically. Today (on Python # 2.3) it just bombs out with an Unicode error when as_string() is called. msg = MIMEText(body) # won't work msg['From'] = sender # won't work msg['To'] = recipient # won't work msg['Subject'] = subject # won't work # At least the SMTP module is smart enough to discard the real name part # that it doesn't need smtp = SMTP("localhost") smtp.sendmail(sender, recipient, msg.as_string()) smtp.quit()