diff --git a/README_FILES/README_UNICODE b/README_FILES/README_UNICODE new file mode 100644 index 0000000..77d8d1a --- /dev/null +++ b/README_FILES/README_UNICODE @@ -0,0 +1,31 @@ +Unicode email addresses + +RFCs 6530-3 add support for unicode email addresses; this version of +Postfix supports that. The support was paid for by cnnic.cn and the +code written by an RFC contributor. + +The overall design is one of allowing UTF-8 in the message and in MAIL +FROM/RCPT TO commands. The changes are subtle enough that procmail +didn't need any changes to work with unicode addresses, even though +most of the code is over 20 years old. + +The xn-- domains you may have seen are not visible in SMTP or email +messages. Postfix converts blåbærsyltetøy to xn--blbrsyltety-y8ao3x +while doing the MX lookup. + +Postfix supports unicode addresses if it's built with ICU (which is +autodetected at build time), and advertises that using the SMTPUTF8 +extension. Postfix will use unicode while talking to another server +only if the message or addressses needs it. If it's possible to use +classic SMTP, Postfix does that. + +The variable myhostname should be a plain ASCII hostname. Even if you +intend to send only unicode mail, Postfix needs to use the name in the +SMTP EHLO command, and that command does not accept UTF-8. And if you +intend to send ASCII mail, you certainly need an ASCII hostname. + +The variables myorigin and mydestinations can contain UTF-8, as can +localparts. + +You can test that Postfix' unicode support works by sending mail to +the autoresponder jøran@blåbærsyltetøy.gulbrandsen.priv.no. diff --git a/makedefs b/makedefs index 5f3bcc7..e8bf924 100644 --- a/makedefs +++ b/makedefs @@ -576,6 +576,44 @@ EOF rm -f makedefs.test makedefs.test.[co] esac + +# +# Look for ICU and enable unicode email if available. This tests +# a different function that Postfix uses in order to avoid having UTF8 +# in this file. The two functions use the same data structures, so they +# should be equivalent for testing purposes. +# +case "$CCARGS" in + *-DNO_EAI*) ;; + *) trap 'rm -f makedefs.test makedefs.test.[co]' 1 2 3 15 + cat >makedefs.test.c <<'EOF' +#include +#include + +int main(int argc, char **argv) +{ + char buf[1024]; + UErrorCode error = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + UIDNA * idna = uidna_openUTS46(UIDNA_DEFAULT, &error); + exit(14 != uidna_labelToUnicodeUTF8(idna, + "xn--lgbbat1ad8j", /* an arabic TLD */ + 15, + buf, + 1024, + &info, + &error)); +} +EOF + ${CC-gcc} -o makedefs.test makedefs.test.c -licuuc >/dev/null 2>&1 + if ./makedefs.test 2>/dev/null ; then + SYSLIBS="$SYSLIBS -licuuc" + else + CCARGS="$CCARGS -DNO_EAI" + fi + rm -f makedefs.test makedefs.test.[co] +esac + # # OpenSSL has no configuration query utility, but we don't try to # guess. We assume includes in /usr/include/openssl and libraries in diff --git a/src/bounce/bounce_notify_util.c b/src/bounce/bounce_notify_util.c index 9cdf264..f1eb2ff 100644 --- a/src/bounce/bounce_notify_util.c +++ b/src/bounce/bounce_notify_util.c @@ -251,6 +251,7 @@ static BOUNCE_INFO *bounce_mail_alloc(const char *service, bounce_info->rcpt_buf = rcpt_buf; bounce_info->dsn_buf = dsn_buf; bounce_info->log_handle = log_handle; + bounce_info->smtputf8 = 0; /* * RFC 1894: diagnostic-type is an RFC 822 atom. We use X-$mail_name and @@ -332,6 +333,19 @@ static BOUNCE_INFO *bounce_mail_alloc(const char *service, STR(bounce_info->buf) : mail_addr_mail_daemon(), 0); } + + /* + * Information for RFC 6533 Content-Type choice. + */ + else if (rec_type = REC_TYPE_ATTR) { + char *attr_name; + char *attr_value; + + if (split_nameval(STR(bounce_info->buf), + &attr_name, &attr_value) == 0 && + rec_attr_map(attr_name) == REC_TYPE_SMTPUTF8) + bounce_info->smtputf8 = 1; + } /* * Backwards compatibility: no data offset in SIZE record. @@ -344,10 +358,6 @@ static BOUNCE_INFO *bounce_mail_alloc(const char *service, bounce_info->orig_offs = vstream_ftell(bounce_info->orig_fp); break; } - if (bounce_info->orig_offs > 0 - && bounce_info->arrival_time > 0 - && VSTRING_LEN(bounce_info->sender) > 0) - break; } } return (bounce_info); @@ -602,7 +612,8 @@ int bounce_header_dsn(VSTREAM *bounce, BOUNCE_INFO *bounce_info) post_mail_fprintf(bounce, "--%s", bounce_info->mime_boundary); post_mail_fprintf(bounce, "Content-Description: %s", "Delivery report"); - post_mail_fprintf(bounce, "Content-Type: %s", "message/delivery-status"); + post_mail_fprintf(bounce, "Content-Type: message/%sdelivery-status", + bounce_info->smtputf8 ? "global-" : ""); /* * According to RFC 1894: The body of a message/delivery-status consists @@ -638,7 +649,9 @@ int bounce_recipient_dsn(VSTREAM *bounce, BOUNCE_INFO *bounce_info) DSN *dsn = &bounce_info->dsn_buf->dsn; post_mail_fputs(bounce, ""); - post_mail_fprintf(bounce, "Final-Recipient: rfc822; %s", rcpt->address); + post_mail_fprintf(bounce, "Final-Recipient: %s; %s", + uses_utf_8(rcpt->address) ? "utf-8" : "rfc822", + rcpt->address); /* * XXX DSN @@ -662,7 +675,8 @@ int bounce_recipient_dsn(VSTREAM *bounce, BOUNCE_INFO *bounce_info) if (NON_NULL_EMPTY(rcpt->dsn_orcpt)) { post_mail_fprintf(bounce, "Original-Recipient: %s", rcpt->dsn_orcpt); } else if (NON_NULL_EMPTY(rcpt->orig_addr)) { - post_mail_fprintf(bounce, "Original-Recipient: rfc822; %s", + post_mail_fprintf(bounce, "Original-Recipient: %s; %s", + uses_utf_8(rcpt->orig_addr) ? "utf-8" : "rfc822", rcpt->orig_addr); } post_mail_fprintf(bounce, "Action: %s", @@ -755,9 +769,14 @@ int bounce_original(VSTREAM *bounce, BOUNCE_INFO *bounce_info, "Undelivered " : "", headers_only == DSN_RET_HDRS ? "Message Headers" : "Message"); - post_mail_fprintf(bounce, "Content-Type: %s", - headers_only == DSN_RET_HDRS ? - "text/rfc822-headers" : "message/rfc822"); + if (bounce_info->smtputf8) + post_mail_fprintf(bounce, "Content-Type: message/%s", + headers_only == DSN_RET_HDRS ? + "global-headers" : "global"); + else + post_mail_fprintf(bounce, "Content-Type: %s", + headers_only == DSN_RET_HDRS ? + "text/rfc822-headers" : "message/rfc822"); if (bounce_info->mime_encoding) post_mail_fprintf(bounce, "Content-Transfer-Encoding: %s", bounce_info->mime_encoding); diff --git a/src/bounce/bounce_service.h b/src/bounce/bounce_service.h index 0a9bc4d..f40e206 100644 --- a/src/bounce/bounce_service.h +++ b/src/bounce/bounce_service.h @@ -84,6 +84,7 @@ typedef struct { DSN_BUF *dsn_buf; /* delivery status info */ BOUNCE_LOG *log_handle; /* open logfile */ char *mail_name; /* $mail_name, cooked */ + int smtputf8; /* 0 for RFC1984, 1 for 6533 */ } BOUNCE_INFO; /* */ diff --git a/src/cleanup/cleanup_milter.c b/src/cleanup/cleanup_milter.c index db38c91..a75f213 100644 --- a/src/cleanup/cleanup_milter.c +++ b/src/cleanup/cleanup_milter.c @@ -107,6 +107,7 @@ #include #include #include +#include /* Application-specific. */ @@ -1419,18 +1420,32 @@ static const char *cleanup_add_rcpt_par(void *context, const char *ext_rcpt, msg_warn("%s: Bad NOTIFY parameter from MILTER: \"%.100s\"", state->queue_id, arg); } else if (strncasecmp(arg, "ORCPT=", 6) == 0) { /* RFC 3461 */ + int good = 0; if (state->milter_orcpt_buf == 0) state->milter_orcpt_buf = vstring_alloc(100); if (dsn_orcpt_info || (type_len = strcspn(arg_val = arg + 6, ";")) == 0 - || (arg_val)[type_len] != ';' - || xtext_unquote_append( + || (arg_val)[type_len] != ';') { + int good = 1; + if (strncasecmp(arg_val, "utf-8", 5) == 0) { + if (uxtext_unquote_append( vstring_sprintf(state->milter_orcpt_buf, "%.*s;", (int) type_len, arg_val), - arg_val + type_len + 1) == 0) { - msg_warn("%s: Bad ORCPT parameter from MILTER: \"%.100s\"", - state->queue_id, arg); + arg_val + type_len + 1) == 0) + good = 0; + } else { + if (xtext_unquote_append( + vstring_sprintf(state->milter_orcpt_buf, + "%.*s;", (int) type_len, + arg_val), + arg_val + type_len + 1) == 0) + good = 0; + } + if (!good) + msg_warn("%s: Bad ORCPT parameter from MILTER: " + "\"%.100s\"", + state->queue_id, arg); } else { dsn_orcpt_info = STR(state->milter_orcpt_buf); } diff --git a/src/global/Makefile.in b/src/global/Makefile.in index f5799eb..f3ebc05 100644 --- a/src/global/Makefile.in +++ b/src/global/Makefile.in @@ -26,7 +26,7 @@ SRCS = abounce.c anvil_clnt.c been_here.c bounce.c bounce_log.c \ sent.c smtp_stream.c split_addr.c string_list.c strip_addr.c \ sys_exits.c timed_ipc.c tok822_find.c tok822_node.c tok822_parse.c \ tok822_resolve.c tok822_rewrite.c tok822_tree.c trace.c \ - user_acl.c valid_mailhost_addr.c verify.c verify_clnt.c \ + user_acl.c uxtext.c valid_mailhost_addr.c verify.c verify_clnt.c \ verp_sender.c wildcard_inet_addr.c xtext.c delivered_hdr.c \ fold_addr.c header_body_checks.c mkmap_proxy.c data_redirect.c \ match_service.c mail_conf_nint.c addr_match_list.c mail_conf_nbool.c \ @@ -60,7 +60,7 @@ OBJS = abounce.o anvil_clnt.o been_here.o bounce.o bounce_log.o \ sent.o smtp_stream.o split_addr.o string_list.o strip_addr.o \ sys_exits.o timed_ipc.o tok822_find.o tok822_node.o tok822_parse.o \ tok822_resolve.o tok822_rewrite.o tok822_tree.o trace.o \ - user_acl.o valid_mailhost_addr.o verify.o verify_clnt.o \ + user_acl.o uxtext.o valid_mailhost_addr.o verify.o verify_clnt.o \ verp_sender.o wildcard_inet_addr.o xtext.o delivered_hdr.o \ fold_addr.o header_body_checks.o mkmap_proxy.o data_redirect.o \ match_service.o mail_conf_nint.o addr_match_list.o mail_conf_nbool.o \ @@ -88,7 +88,7 @@ HDRS = abounce.h anvil_clnt.h been_here.h bounce.h bounce_log.h \ rec_type.h recipient_list.h record.h resolve_clnt.h resolve_local.h \ rewrite_clnt.h scache.h sent.h smtp_stream.h split_addr.h \ string_list.h strip_addr.h sys_exits.h timed_ipc.h tok822.h \ - trace.h user_acl.h valid_mailhost_addr.h verify.h verify_clnt.h \ + trace.h user_acl.h uxtext.h valid_mailhost_addr.h verify.h verify_clnt.h \ verp_sender.h wildcard_inet_addr.h xtext.h delivered_hdr.h \ fold_addr.h header_body_checks.h data_redirect.h match_service.h \ addr_match_list.h smtp_reply_footer.h safe_ultostr.h \ @@ -257,6 +257,11 @@ strip_addr: $(LIB) $(LIBS) $(CC) -DTEST $(CFLAGS) -o $@ $@.c $(LIB) $(LIBS) $(SYSLIBS) mv junk $@.o +uxtext: $(LIB) $(LIBS) + mv $@.o junk + $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(LIBS) $(SYSLIBS) + mv junk $@.o + verify_clnt: $(LIB) $(LIBS) mv $@.o junk $(CC) $(CFLAGS) -DTEST -o $@ $@.c $(LIB) $(LIBS) $(SYSLIBS) diff --git a/src/global/deliver_request.c b/src/global/deliver_request.c index e32e82f..fd60386 100644 --- a/src/global/deliver_request.c +++ b/src/global/deliver_request.c @@ -209,6 +209,7 @@ static int deliver_request_get(VSTREAM *stream, DELIVER_REQUEST *request) static RCPT_BUF *rcpt_buf; int rcpt_count; int dsn_ret; + int smtputf8 = 0; /* * Initialize. For some reason I wanted to allow for multiple instances @@ -250,6 +251,7 @@ static int deliver_request_get(VSTREAM *stream, DELIVER_REQUEST *request) ATTR_TYPE_STR, MAIL_ATTR_SENDER, address, ATTR_TYPE_STR, MAIL_ATTR_DSN_ENVID, dsn_envid, ATTR_TYPE_INT, MAIL_ATTR_DSN_RET, &dsn_ret, + ATTR_TYPE_INT, MAIL_ATTR_SMTPUTF8, &smtputf8, ATTR_TYPE_FUNC, msg_stats_scan, (void *) &request->msg_stats, /* XXX Should be encapsulated with ATTR_TYPE_FUNC. */ ATTR_TYPE_STR, MAIL_ATTR_LOG_CLIENT_NAME, client_name, @@ -265,7 +267,7 @@ static int deliver_request_get(VSTREAM *stream, DELIVER_REQUEST *request) ATTR_TYPE_STR, MAIL_ATTR_LOG_IDENT, log_ident, ATTR_TYPE_STR, MAIL_ATTR_RWR_CONTEXT, rewrite_context, ATTR_TYPE_INT, MAIL_ATTR_RCPT_COUNT, &rcpt_count, - ATTR_TYPE_END) != 22) { + ATTR_TYPE_END) != 23) { msg_warn("%s: error receiving common attributes", myname); return (-1); } @@ -294,6 +296,8 @@ static int deliver_request_get(VSTREAM *stream, DELIVER_REQUEST *request) request->rewrite_context = mystrdup(vstring_str(rewrite_context)); request->dsn_envid = mystrdup(vstring_str(dsn_envid)); request->dsn_ret = dsn_ret; + if(smtputf8) + request->flags |= DEL_REQ_FLAG_SMTPUTF8; /* * Extract the recipient offset and address list. Skip over any @@ -363,6 +367,7 @@ static DELIVER_REQUEST *deliver_request_alloc(void) request = (DELIVER_REQUEST *) mymalloc(sizeof(*request)); request->fp = 0; + request->flags = 0; request->queue_name = 0; request->queue_id = 0; request->nexthop = 0; diff --git a/src/global/deliver_request.h b/src/global/deliver_request.h index d2ca771..2abf945 100644 --- a/src/global/deliver_request.h +++ b/src/global/deliver_request.h @@ -73,6 +73,8 @@ typedef struct DELIVER_REQUEST { #define DEL_REQ_FLAG_CONN_LOAD (1<<11) /* Consult opportunistic cache */ #define DEL_REQ_FLAG_CONN_STORE (1<<12) /* Update opportunistic cache */ +#define DEL_REQ_FLAG_SMTPUTF8 (1<<13) /* Delivery requires SMTPUTF8 */ + /* * Cache Load and Store as value or mask. Use explicit _MASK for multi-bit * values. diff --git a/src/global/ehlo_mask.c b/src/global/ehlo_mask.c index 9c60dc7..392e9d2 100644 --- a/src/global/ehlo_mask.c +++ b/src/global/ehlo_mask.c @@ -75,6 +75,7 @@ static const NAME_MASK ehlo_mask_table[] = { "STARTTLS", EHLO_MASK_STARTTLS, "ENHANCEDSTATUSCODES", EHLO_MASK_ENHANCEDSTATUSCODES, "DSN", EHLO_MASK_DSN, + "SMTPUTF8", EHLO_MASK_SMTPUTF8, "SILENT-DISCARD", EHLO_MASK_SILENT, /* XXX In-band signaling */ 0, }; diff --git a/src/global/ehlo_mask.h b/src/global/ehlo_mask.h index 62256f1..3ef2a2d 100644 --- a/src/global/ehlo_mask.h +++ b/src/global/ehlo_mask.h @@ -27,6 +27,7 @@ #define EHLO_MASK_XFORWARD (1<<9) #define EHLO_MASK_ENHANCEDSTATUSCODES (1<<10) #define EHLO_MASK_DSN (1<<11) +#define EHLO_MASK_SMTPUTF8 (1<<12) #define EHLO_MASK_SILENT (1<<15) extern int ehlo_mask(const char *); diff --git a/src/global/mail_params.c b/src/global/mail_params.c index f322af6..7859362 100644 --- a/src/global/mail_params.c +++ b/src/global/mail_params.c @@ -687,6 +687,9 @@ void mail_params_init() * Variables whose defaults are determined at runtime. Some sites use * short hostnames in the host table; some sites name their system after * the domain. + * + * Postfix does not allow an IDN domain as hostname, due to + * RFC6531 section 3.7.1. */ get_mail_conf_str_fn_table(function_str_defaults); if (!valid_hostname(var_myhostname, DO_GRIPE)) diff --git a/src/global/mail_proto.h b/src/global/mail_proto.h index 976b125..dc73398 100644 --- a/src/global/mail_proto.h +++ b/src/global/mail_proto.h @@ -246,6 +246,11 @@ extern char *mail_pathname(const char *, const char *); #define MAIL_ATTR_DSN_ORCPT "dsn_orig_rcpt" /* dsn original recipient */ /* + * RFC6531 support. + */ +#define MAIL_ATTR_SMTPUTF8 "smtputf8" /* unicode or ascii/mime */ + + /* * TLSPROXY support. */ #define MAIL_ATTR_REMOTE_ENDPT "remote_endpoint" /* name[addr]:port */ diff --git a/src/global/rec_attr_map.c b/src/global/rec_attr_map.c index e98dde8..3dc9a92 100644 --- a/src/global/rec_attr_map.c +++ b/src/global/rec_attr_map.c @@ -48,6 +48,8 @@ int rec_attr_map(const char *attr_name) return (REC_TYPE_DSN_RET); } else if (strcmp(attr_name, MAIL_ATTR_CREATE_TIME) == 0) { return (REC_TYPE_CTIME); + } else if (strcmp(attr_name, MAIL_ATTR_SMTPUTF8) == 0) { + return (REC_TYPE_SMTPUTF8); } else { return (0); } diff --git a/src/global/rec_type.c b/src/global/rec_type.c index 1ce2111..013f967 100644 --- a/src/global/rec_type.c +++ b/src/global/rec_type.c @@ -71,6 +71,7 @@ REC_TYPE_NAME rec_type_names[] = { REC_TYPE_DSN_ENVID, "dsn_envelope_id", REC_TYPE_DSN_ORCPT, "dsn_original_recipient", REC_TYPE_DSN_NOTIFY, "dsn_notify_flags", + REC_TYPE_SMTPUTF8, "smtputf8", 0, 0, }; diff --git a/src/global/rec_type.h b/src/global/rec_type.h index f6668cd..8e82482 100644 --- a/src/global/rec_type.h +++ b/src/global/rec_type.h @@ -74,6 +74,8 @@ #define REC_TYPE_MILT_COUNT 'm' +#define REC_TYPE_SMTPUTF8 '8' /* message uses UTF8 addresses etc */ + #define REC_TYPE_END 'E' /* terminator, required */ /* diff --git a/src/global/tok822_parse.c b/src/global/tok822_parse.c index 4376802..6ff8fe6 100644 --- a/src/global/tok822_parse.c +++ b/src/global/tok822_parse.c @@ -434,7 +434,7 @@ TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit) /* * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to - * allow for forms such as: Johnny B. Goode . I cannot + * allow for forms such as: Johnny B. Goode . I cannot * handle that at the tokenizer level - it is not context sensitive. And * to fix this at the parser level requires radical changes to preserve * white space as part of the token stream. Thanks a lot, people. diff --git a/src/global/uxtext.c b/src/global/uxtext.c new file mode 100644 index 0000000..9a41de8 --- /dev/null +++ b/src/global/uxtext.c @@ -0,0 +1,254 @@ +/*++ +/* NAME +/* uxtext 3 +/* SUMMARY +/* quote/unquote text, xtext style. +/* SYNOPSIS +/* #include +/* +/* VSTRING *uxtext_quote(quoted, unquoted, special) +/* VSTRING *quoted; +/* const char *unquoted; +/* const char *special; +/* +/* VSTRING *uxtext_quote_append(unquoted, quoted, special) +/* VSTRING *unquoted; +/* const char *quoted; +/* const char *special; +/* +/* VSTRING *uxtext_unquote(unquoted, quoted) +/* VSTRING *unquoted; +/* const char *quoted; +/* +/* VSTRING *uxtext_unquote_append(unquoted, quoted) +/* VSTRING *unquoted; +/* const char *quoted; +/* DESCRIPTION +/* uxtext_quote() takes a null-terminated UTF8 string and replaces +/* characters \, <33(10) and >126(10), as well as characters specified +/* with "special" by \x{XX}, XX being a 2-6-digit uppercase hexadecimal +/* equivalent. +/* +/* uxtext_quote_append() is like uxtext_quote(), but appends the conversion +/* result to the result buffer. +/* +/* uxtext_unquote() performs the opposite transformation. This function +/* understands lowercase, uppercase, and mixed case +\x{XX...} sequences. +/* The result value is the unquoted argument in case of success, a null +/* pointer otherwise. +/* +/* uxtext_unquote_append() is like uxtext_unquote(), but appends +/* the conversion result to the result buffer. +/* BUGS +/* This module cannot process null characters in data. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/*--*/ + +/* System library. */ + +#include +#include +#include + +/* Utility library. */ + +#include "msg.h" +#include "vstring.h" +#include "uxtext.h" + +/* Application-specific. */ + +#define STR(x) vstring_str(x) +#define LEN(x) VSTRING_LEN(x) + +/* uxtext_quote_append - append unquoted data to quoted data */ + +VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted, + const char *special) +{ + unsigned const char *cp; + int ch; + + for (cp = (unsigned const char *)unquoted; (ch = *cp) != 0; cp++) { + if (ch != '+' && ch > 32 && ch < 127 + && (*special == 0 || strchr(special, ch) == 0)) { + VSTRING_ADDCH(quoted, ch); + } else { + /* had RFC6533 been written like 6531 and 652, this else + clause would be one line long. */ + int unicode = 0; + int pick = 0; + + if ( ch < 0x80 ) { + // 0000 0000-0000 007F 0xxxxxxx + unicode = ch; + } else if ((ch & 0xe0) == 0xc0) { + // 0000 0080-0000 07FF 110xxxxx 10xxxxxx + unicode = (ch & 0x1f); + pick = 1; + } else if ((ch & 0xf0) == 0xe0) { + // 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + unicode = (ch & 0x0f); + pick = 2; + } else if ((ch & 0xf8) == 0xf0) { + // 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + unicode = (ch & 0x07); + pick = 3; + } else if ((ch & 0xfc) == 0xf8) { + // 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx ... 10xxxxxx + unicode = (ch & 0x03); + pick = 4; + } else if ((ch & 0xfe) == 0xfc) { + // 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx + unicode = (ch & 0x01); + pick = 5; + } else { + return (0); + } + while (pick > 0) { + ch = *++cp; + if ((ch & 0xc0) != 0x80) + return (0); + unicode = unicode << 6 | (ch & 0x3f); + pick--; + } + vstring_sprintf_append(quoted, "\\x{%02X}", unicode); + } + } + VSTRING_TERMINATE(quoted); + return (quoted); +} + +/* uxtext_quote - unquoted data to quoted */ + +VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special) +{ + VSTRING_RESET(quoted); + uxtext_quote_append(quoted, unquoted, special); + return (quoted); +} + +/* uxtext_unquote_append - quoted data to unquoted */ + +VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted) +{ + const char *cp; + int ch; + + for (cp = quoted; (ch = *cp) != 0; cp++) { + if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') { + cp += 2; + int unicode = 0; + while ((ch = *++cp) != '}') { + if (ISDIGIT(ch)) + unicode = (unicode << 4) + (ch-'0'); + else if (ch >= 'a' && ch <= 'f') + unicode = (unicode << 4) + (ch-'a'+10); + else if (ch >= 'A' && ch <= 'F') + unicode = (unicode << 4) + (ch-'A'+10); + else + return (0); /* also covers the null terminator */ + if (unicode > 0x10ffff) + return (0); + } + + /* the following block is from + * https://github.com/aox/aox/blob/master/encodings/utf.cpp, + * with permission by the authors. + */ + if ( unicode < 0x80 ) { + VSTRING_ADDCH(unquoted, (char)unicode ); + } else if ( unicode < 0x800 ) { + VSTRING_ADDCH(unquoted, 0xc0 | ((char)(unicode >> 6))); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode & 0x3f))); + } else if ( unicode < 0x10000 ) { + VSTRING_ADDCH(unquoted, 0xe0 | ((char)(unicode >> 12))); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 6) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode & 0x3f))); + } else if ( unicode < 0x200000 ) { + VSTRING_ADDCH(unquoted, 0xf0 | ((char)(unicode >> 18))); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 12) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 6) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode & 0x3f))); + } else if ( unicode < 0x4000000 ) { + VSTRING_ADDCH(unquoted, 0xf8 | ((char)(unicode >> 24))); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 18) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 12) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 6) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode & 0x3f))); + } else { + VSTRING_ADDCH(unquoted, 0xfc | ((char)(unicode >> 30))); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 24) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 18) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 12) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode >> 6) & 0x3f)); + VSTRING_ADDCH(unquoted, 0x80 | ((char)(unicode & 0x3f))); + } + } else { + VSTRING_ADDCH(unquoted, ch); + } + } + VSTRING_TERMINATE(unquoted); + return (unquoted); +} + +/* uxtext_unquote - quoted data to unquoted */ + +VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted) +{ + VSTRING_RESET(unquoted); + uxtext_unquote_append(unquoted, quoted); + return (unquoted); +} + +#ifdef TEST + + /* + * Proof-of-concept test program: convert to quoted and back. + */ +#include + +#define BUFLEN 1024 + +static ssize_t read_buf(VSTREAM *fp, VSTRING *buf) +{ + ssize_t len; + + VSTRING_RESET(buf); + len = vstream_fread(fp, STR(buf), vstring_avail(buf)); + VSTRING_AT_OFFSET(buf, len); /* XXX */ + VSTRING_TERMINATE(buf); + return (len); +} + +int main(int unused_argc, char **unused_argv) +{ + VSTRING *unquoted = vstring_alloc(BUFLEN); + VSTRING *quoted = vstring_alloc(100); + ssize_t len; + + while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) { + uxtext_quote(quoted, STR(unquoted), "+="); + if (uxtext_unquote(unquoted, STR(quoted)) == 0) + msg_fatal("bad input: %.100s", STR(quoted)); + if (LEN(unquoted) != len) + msg_fatal("len %ld != unquoted len %ld", + (long) len, (long) LEN(unquoted)); + if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted)) + msg_fatal("write error: %m"); + } + vstream_fflush(VSTREAM_OUT); + vstring_free(unquoted); + vstring_free(quoted); + return (0); +} + +#endif diff --git a/src/global/uxtext.h b/src/global/uxtext.h new file mode 100644 index 0000000..add8d39 --- /dev/null +++ b/src/global/uxtext.h @@ -0,0 +1,38 @@ +#ifndef _XTEXT_H_INCLUDED_ +#define _XTEXT_H_INCLUDED_ + +/*++ +/* NAME +/* uxtext 3h +/* SUMMARY +/* quote/unquote text, RFC 6533 style. +/* SYNOPSIS +/* #include +/* DESCRIPTION +/* .nf + + /* + * Utility library. + */ +#include + + /* + * External interface. + */ +extern VSTRING *uxtext_quote(VSTRING *, const char *, const char *); +extern VSTRING *uxtext_quote_append(VSTRING *, const char *, const char *); +extern VSTRING *uxtext_unquote(VSTRING *, const char *); +extern VSTRING *uxtext_unquote_append(VSTRING *, const char *); + +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/*--*/ + +#endif diff --git a/src/oqmgr/qmgr.h b/src/oqmgr/qmgr.h index 802ce12..e51b438 100644 --- a/src/oqmgr/qmgr.h +++ b/src/oqmgr/qmgr.h @@ -298,6 +298,7 @@ struct QMGR_MESSAGE { char *sender; /* complete address */ char *dsn_envid; /* DSN envelope ID */ int dsn_ret; /* DSN headers/full */ + int smtputf8; /* requires unicode */ char *verp_delims; /* VERP delimiters */ char *filter_xport; /* filtering transport */ char *inspect_xport; /* inspecting transport */ diff --git a/src/oqmgr/qmgr_deliver.c b/src/oqmgr/qmgr_deliver.c index ce81fd7..6d14a00 100644 --- a/src/oqmgr/qmgr_deliver.c +++ b/src/oqmgr/qmgr_deliver.c @@ -165,6 +165,7 @@ static int qmgr_deliver_send_request(QMGR_ENTRY *entry, VSTREAM *stream) ATTR_TYPE_STR, MAIL_ATTR_SENDER, sender, ATTR_TYPE_STR, MAIL_ATTR_DSN_ENVID, message->dsn_envid, ATTR_TYPE_INT, MAIL_ATTR_DSN_RET, message->dsn_ret, + ATTR_TYPE_INT, MAIL_ATTR_SMTPUTF8, message->smtputf8, ATTR_TYPE_FUNC, msg_stats_print, (void *) &stats, /* XXX Should be encapsulated with ATTR_TYPE_FUNC. */ ATTR_TYPE_STR, MAIL_ATTR_LOG_CLIENT_NAME, message->client_name, diff --git a/src/oqmgr/qmgr_message.c b/src/oqmgr/qmgr_message.c index e2f9237..0608356 100644 --- a/src/oqmgr/qmgr_message.c +++ b/src/oqmgr/qmgr_message.c @@ -168,6 +168,7 @@ static QMGR_MESSAGE *qmgr_message_create(const char *queue_name, message->sender = 0; message->dsn_envid = 0; message->dsn_ret = 0; + message->smtputf8 = 0; message->filter_xport = 0; message->inspect_xport = 0; message->redirect_addr = 0; @@ -605,6 +606,9 @@ static int qmgr_message_read(QMGR_MESSAGE *message) message->dsn_ret = n; } } + if (rec_type == REC_TYPE_SMTPUTF8) { + message->smtputf8 = 1; + } if (rec_type == REC_TYPE_ATTR) { /* Allow extra segment to override envelope segment info. */ if (strcmp(name, MAIL_ATTR_ENCODING) == 0) { diff --git a/src/postscreen/postscreen_dnsbl.c b/src/postscreen/postscreen_dnsbl.c index ef3140b..3656c6f 100644 --- a/src/postscreen/postscreen_dnsbl.c +++ b/src/postscreen/postscreen_dnsbl.c @@ -241,7 +241,7 @@ static void psc_dnsbl_add_site(const char *site) if ((parse_err = ip_match_parse(byte_codes, pattern_text)) != 0) msg_fatal("bad DNSBL filter syntax: %s", parse_err); } - if (valid_hostname(saved_site, DO_GRIPE) == 0) + if (valid_hostname(saved_site, DO_GRIPE) == 0) /* no IDN support for DNSbls */ msg_fatal("bad DNSBL domain name \"%s\" in \"%s\"", saved_site, site); diff --git a/src/qmgr/qmgr.h b/src/qmgr/qmgr.h index 6737e42..c9699e7 100644 --- a/src/qmgr/qmgr.h +++ b/src/qmgr/qmgr.h @@ -268,7 +268,7 @@ extern void qmgr_queue_suspend(QMGR_QUEUE *, int); * "not throttled". It was natural to encode these in the queue window size. * After 10 years it's not practical to rip out all the working code and * change representations, so we just clean up the names a little. - * + * * Note: only the "ready" state can reach every state (including itself); * non-ready states can reach only the "ready" state. Other transitions are * forbidden, because they would result in dangling event handlers. @@ -343,6 +343,7 @@ struct QMGR_MESSAGE { char *sender; /* complete address */ char *dsn_envid; /* DSN envelope ID */ int dsn_ret; /* DSN headers/full */ + int smtputf8; /* requires unicode */ char *verp_delims; /* VERP delimiters */ char *filter_xport; /* filtering transport */ char *inspect_xport; /* inspecting transport */ diff --git a/src/qmgr/qmgr_deliver.c b/src/qmgr/qmgr_deliver.c index 2fbb049..841a227 100644 --- a/src/qmgr/qmgr_deliver.c +++ b/src/qmgr/qmgr_deliver.c @@ -170,6 +170,7 @@ static int qmgr_deliver_send_request(QMGR_ENTRY *entry, VSTREAM *stream) ATTR_TYPE_STR, MAIL_ATTR_SENDER, sender, ATTR_TYPE_STR, MAIL_ATTR_DSN_ENVID, message->dsn_envid, ATTR_TYPE_INT, MAIL_ATTR_DSN_RET, message->dsn_ret, + ATTR_TYPE_INT, MAIL_ATTR_SMTPUTF8, message->smtputf8, ATTR_TYPE_FUNC, msg_stats_print, (void *) &stats, /* XXX Should be encapsulated with ATTR_TYPE_FUNC. */ ATTR_TYPE_STR, MAIL_ATTR_LOG_CLIENT_NAME, message->client_name, diff --git a/src/qmgr/qmgr_message.c b/src/qmgr/qmgr_message.c index e6bbaf6..b4807cf 100644 --- a/src/qmgr/qmgr_message.c +++ b/src/qmgr/qmgr_message.c @@ -179,6 +179,7 @@ static QMGR_MESSAGE *qmgr_message_create(const char *queue_name, message->sender = 0; message->dsn_envid = 0; message->dsn_ret = 0; + message->smtputf8 = 0; message->filter_xport = 0; message->inspect_xport = 0; message->redirect_addr = 0; @@ -646,6 +647,9 @@ static int qmgr_message_read(QMGR_MESSAGE *message) message->dsn_ret = n; } } + if (rec_type == REC_TYPE_SMTPUTF8) { + message->smtputf8 = 1; + } if (rec_type == REC_TYPE_ATTR) { /* Allow extra segment to override envelope segment info. */ if (strcmp(name, MAIL_ATTR_ENCODING) == 0) { diff --git a/src/sendmail/sendmail.c b/src/sendmail/sendmail.c index 83dac65..f23dc9f 100644 --- a/src/sendmail/sendmail.c +++ b/src/sendmail/sendmail.c @@ -609,6 +609,7 @@ static void output_header(void *context, int header_class, } } + /* enqueue - post one message */ static void enqueue(const int flags, const char *encoding, @@ -641,6 +642,7 @@ static void enqueue(const int flags, const char *encoding, const char *errstr; int addr_count; int level; + int smtputf8 = 0; static NAME_CODE sm_fix_eol_table[] = { SM_FIX_EOL_ALWAYS, STRIP_CR_DO, SM_FIX_EOL_STRICT, STRIP_CR_DUNNO, @@ -692,6 +694,8 @@ static void enqueue(const int flags, const char *encoding, saved_sender = mystrdup(sender); } + smtputf8 |= uses_utf_8(saved_sender); + /* * Let the postdrop command open the queue file for us, and sanity check * the content. XXX Make postdrop a manifest constant. @@ -760,6 +764,7 @@ static void enqueue(const int flags, const char *encoding, saved_sender, (long) uid); ++rcpt_count; ++addr_count; + smtputf8 |= uses_utf_8(STR(buf)); } } tok822_free_tree(tree); @@ -774,6 +779,17 @@ static void enqueue(const int flags, const char *encoding, } /* + * If either the sender or any recipients contain non-ascii + * characters, then this message has to be sent with the SMTPUTF8 + * extension. + */ + + if(smtputf8) { + rec_fprintf(dst, REC_TYPE_ATTR, "%s=%d", + MAIL_ATTR_SMTPUTF8, 1); + } + + /* * Append the message contents to the queue file. Write chunks of at most * 1kbyte. Internally, we use different record types for data ending in * LF and for data that doesn't, so we can actually be binary transparent diff --git a/src/smtp/smtp.h b/src/smtp/smtp.h index ee9e506..b576c0d 100644 --- a/src/smtp/smtp.h +++ b/src/smtp/smtp.h @@ -221,6 +221,7 @@ typedef struct SMTP_STATE { #define SMTP_FEATURE_XFORWARD_PORT (1<<18) #define SMTP_FEATURE_EARLY_TLS_MAIL_REPLY (1<<19) /* CVE-2009-3555 */ #define SMTP_FEATURE_XFORWARD_IDENT (1<<20) +#define SMTP_FEATURE_SMTPUTF8 (1<<21) /* * Features that passivate under the endpoint. diff --git a/src/smtp/smtp_addr.c b/src/smtp/smtp_addr.c index deba52f..ca426a8 100644 --- a/src/smtp/smtp_addr.c +++ b/src/smtp/smtp_addr.c @@ -96,6 +96,12 @@ #include +/* ICU, in order to do the right DNS lookups */ + +#ifndef NO_EAI +#include +#endif + /* Application-specific. */ #include "smtp.h" @@ -340,6 +346,51 @@ static DNS_RR *smtp_truncate_self(DNS_RR *addr_list, unsigned pref) return (addr_list); } +/* smtp_domain_addr - return an ASCII copy of a domain + + The caller must free the return value. +*/ + +char * smtp_domain_ascii(const char *name) +{ + char * aname; +#ifndef NO_EAI + unsigned char * cp = (unsigned char *)name; + + while (cp && *cp && *cp < 128) + cp++; + + if (cp && *cp > 128) { + char buf[1024]; + UErrorCode error = U_ZERO_ERROR; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + UIDNA * idna; + int anl; + + idna = uidna_openUTS46(UIDNA_DEFAULT, &error); + anl = uidna_nameToASCII_UTF8(idna, + name, strlen(name), + buf, 1024, + &info, + &error); + uidna_close(idna); + if(U_SUCCESS(error) && info.errors == 0 && anl > 0) { + aname = mymalloc(anl+1); + aname[anl] = 0; + strncpy(aname, buf, anl); + return aname; + } else { + msg_info("smtp_domain_ascii: " + "Problem translating domain %s to IDNA form: %s", + name, u_errorName(error)); + } + } +#endif + aname = mymalloc(strlen(name)+1); + strcpy(aname, name); + return aname; +} + /* smtp_domain_addr - mail exchanger address lookup */ DNS_RR *smtp_domain_addr(char *name, DNS_RR **mxrr, int misc_flags, @@ -351,6 +402,7 @@ DNS_RR *smtp_domain_addr(char *name, DNS_RR **mxrr, int misc_flags, unsigned best_pref; unsigned best_found; int r = 0; /* Resolver flags */ + char *aname; dsb_reset(why); /* Paranoia */ @@ -367,6 +419,8 @@ DNS_RR *smtp_domain_addr(char *name, DNS_RR **mxrr, int misc_flags, if (smtp_dns_support == SMTP_DNS_DNSSEC) r |= RES_USE_DNSSEC; + aname = smtp_domain_ascii(name); + /* * Look up the mail exchanger hosts listed for this name. Sort the * results by preference. Look up the corresponding host addresses, and @@ -409,21 +463,21 @@ DNS_RR *smtp_domain_addr(char *name, DNS_RR **mxrr, int misc_flags, * at hostnames provides a partial solution for MX hosts behind a NAT * gateway. */ - switch (dns_lookup(name, T_MX, r, &mx_names, (VSTRING *) 0, why->reason)) { + switch (dns_lookup(aname, T_MX, r, &mx_names, (VSTRING *) 0, why->reason)) { default: dsb_status(why, "4.4.3"); if (var_ign_mx_lookup_err) - addr_list = smtp_host_addr(name, misc_flags, why); + addr_list = smtp_host_addr(aname, misc_flags, why); break; case DNS_INVAL: dsb_status(why, "5.4.4"); if (var_ign_mx_lookup_err) - addr_list = smtp_host_addr(name, misc_flags, why); + addr_list = smtp_host_addr(aname, misc_flags, why); break; case DNS_FAIL: dsb_status(why, "5.4.3"); if (var_ign_mx_lookup_err) - addr_list = smtp_host_addr(name, misc_flags, why); + addr_list = smtp_host_addr(aname, misc_flags, why); break; case DNS_OK: mx_names = dns_rr_sort(mx_names, dns_rr_compare_pref_any); @@ -472,13 +526,14 @@ DNS_RR *smtp_domain_addr(char *name, DNS_RR **mxrr, int misc_flags, } break; case DNS_NOTFOUND: - addr_list = smtp_host_addr(name, misc_flags, why); + addr_list = smtp_host_addr(aname, misc_flags, why); break; } /* * Clean up. */ + myfree(aname); *found_myself |= (self != 0); return (addr_list); } diff --git a/src/smtp/smtp_addr.h b/src/smtp/smtp_addr.h index cf0b689..6784628 100644 --- a/src/smtp/smtp_addr.h +++ b/src/smtp/smtp_addr.h @@ -16,6 +16,7 @@ /* * Internal interfaces. */ +extern char *smtp_domain_ascii(const char *); extern DNS_RR *smtp_host_addr(const char *, int, DSN_BUF *); extern DNS_RR *smtp_domain_addr(char *, DNS_RR **, int, DSN_BUF *, int *); diff --git a/src/smtp/smtp_proto.c b/src/smtp/smtp_proto.c index fbae51f..bc4f673 100644 --- a/src/smtp/smtp_proto.c +++ b/src/smtp/smtp_proto.c @@ -233,6 +233,7 @@ char *xfer_request[SMTP_STATE_LAST] = { #define SMTP_MIME_DOWNGRADE(session, request) \ (var_disable_mime_oconv == 0 \ + && (request->flags & DEL_REQ_FLAG_SMTPUTF8) == 0 \ && (session->features & SMTP_FEATURE_8BITMIME) == 0 \ && strcmp(request->encoding, MAIL_ATTR_ENC_7BIT) != 0) @@ -547,6 +548,9 @@ int smtp_helo(SMTP_STATE *state) } else if (strcasecmp(word, "DSN") == 0) { if ((discard_mask & EHLO_MASK_DSN) == 0) session->features |= SMTP_FEATURE_DSN; + } else if (strcasecmp(word, "SMTPUTF8") == 0) { + if ((discard_mask & EHLO_MASK_SMTPUTF8) == 0) + session->features |= SMTP_FEATURE_SMTPUTF8; } n++; } @@ -1258,6 +1262,21 @@ static int smtp_loop(SMTP_STATE *state, NOCLOBBER int send_state, * The main protocol loop. */ do { + if (send_state == SMTP_STATE_MAIL && + (request->flags & DEL_REQ_FLAG_SMTPUTF8) && + !(session->features & SMTP_FEATURE_SMTPUTF8)) { + /* + * The message requires unicode, the server does not + * support that. This isn't going to work. + */ + smtp_site_fail(state, DSN_BY_LOCAL_MTA, + SMTP_RESP_FAKE(&fake, "5.6.7"), + "SMTPUTF8 is required, " + "but was not offered by host %s", + session->namaddr); + send_state = SMTP_STATE_ABORT; + break; + } /* * Build the next command. @@ -1377,6 +1396,9 @@ static int smtp_loop(SMTP_STATE *state, NOCLOBBER int send_state, vstring_sprintf_append(next_command, " RET=%s", dsn_ret_str(request->dsn_ret)); } + if (request->flags & DEL_REQ_FLAG_SMTPUTF8) { + vstring_strcat(next_command, " SMTPUTF8"); + } /* * We authenticate the local MTA only, but not the sender. @@ -1432,18 +1454,24 @@ static int smtp_loop(SMTP_STATE *state, NOCLOBBER int send_state, vstring_str(session->scratch)); if (session->features & SMTP_FEATURE_DSN) { /* XXX DSN xtext encode address value not type. */ - if (rcpt->dsn_orcpt[0]) { - xtext_quote(session->scratch, rcpt->dsn_orcpt, "+="); - vstring_sprintf_append(next_command, " ORCPT=%s", - vstring_str(session->scratch)); - } else if (rcpt->orig_addr[0]) { + const char * orcpt = rcpt->dsn_orcpt; + if (!*orcpt) { quote_822_local(session->scratch, rcpt->orig_addr); - vstring_sprintf(session->scratch2, "rfc822;%s", + vstring_sprintf(session->scratch2, "%s;%s", + uses_utf_8(vstring_str(session->scratch)) + ? "utf-8" : "rfc822", vstring_str(session->scratch)); - xtext_quote(session->scratch, vstring_str(session->scratch2), "+="); - vstring_sprintf_append(next_command, " ORCPT=%s", - vstring_str(session->scratch)); + orcpt = vstring_str(session->scratch2); + } + if (session->features & SMTP_FEATURE_SMTPUTF8) { + vstring_strcpy(session->scratch, orcpt); + } else if (uses_utf_8(orcpt)) { + uxtext_quote(session->scratch, orcpt, "\\"); + } else { + xtext_quote(session->scratch, orcpt, "+="); } + vstring_sprintf_append(next_command, " ORCPT=%s", + vstring_str(session->scratch)); if (rcpt->dsn_notify) vstring_sprintf_append(next_command, " NOTIFY=%s", dsn_notify_str(rcpt->dsn_notify)); diff --git a/src/smtp/smtp_tls_policy.c b/src/smtp/smtp_tls_policy.c index f280810..537e264 100644 --- a/src/smtp/smtp_tls_policy.c +++ b/src/smtp/smtp_tls_policy.c @@ -375,7 +375,7 @@ static void tls_policy_lookup(SMTP_TLS_POLICY *tls, int *site_level, * * XXX UNIX-domain connections query with the pathname as destination. */ - if (!valid_hostname(site_name, DONT_GRIPE)) { + if (!valid_mail_domain(site_name, DONT_GRIPE)) { tls_policy_lookup_one(tls, site_level, site_name, site_class); return; } diff --git a/src/smtpd/smtpd.c b/src/smtpd/smtpd.c index fa01313..0739ae4 100644 --- a/src/smtpd/smtpd.c +++ b/src/smtpd/smtpd.c @@ -1791,6 +1791,8 @@ static int ehlo_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) EHLO_APPEND(state, "8BITMIME"); if ((discard_mask & EHLO_MASK_DSN) == 0) EHLO_APPEND(state, "DSN"); + if ((discard_mask & EHLO_MASK_SMTPUTF8) == 0) + EHLO_APPEND(state, "SMTPUTF8"); /* * Send the reply. @@ -1971,6 +1973,13 @@ static int mail_open_stream(SMTPD_STATE *state) MAIL_ATTR_ENCODING, state->encoding); /* + * Record that the message uses SMTPUTF8, if it so does. + */ + if (state->flags & SMTPD_FLAG_SMTPUTF8) + rec_fprintf(state->cleanup, REC_TYPE_ATTR, "%s=%d", + MAIL_ATTR_SMTPUTF8, 1); + + /* * Store client attributes. */ if (SMTPD_STAND_ALONE(state) == 0) { @@ -2292,6 +2301,8 @@ static int mail_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) smtpd_chat_reply(state, "552 5.3.4 Message size exceeds file system imposed limit"); return (-1); } + } else if (strcasecmp(arg, "SMTPUTF8") == 0) { /* RFC 6531 */ + state->flags |= SMTPD_FLAG_SMTPUTF8; #ifdef USE_SASL_AUTH } else if (strncasecmp(arg, "AUTH=", 5) == 0) { if ((err = smtpd_sasl_mail_opt(state, arg + 5)) != 0) { @@ -2408,6 +2419,16 @@ static int mail_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) return (-1); } } + if ((state->flags & SMTPD_FLAG_SMTPUTF8) == 0) { + unsigned char * cp = STR(state->addr_buf); + while (cp && *cp && *cp < 128) + cp++; + if(cp && *cp >= 128) { + mail_reset(state); + smtpd_chat_reply(state, "553 5.6.7 Must declare SMTPUTF8 to send from unicode addresses"); + return (-1); + } + } /* * Check the queue file space, if applicable. The optional before-filter @@ -2618,13 +2639,24 @@ static int rcpt_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) } vstring_strcpy(state->dsn_orcpt_buf, arg + 6); if (dsn_orcpt_addr - || (coded_addr = split_at(STR(state->dsn_orcpt_buf), ';')) == 0 - || xtext_unquote(state->dsn_buf, coded_addr) == 0 - || *(dsn_orcpt_type = STR(state->dsn_orcpt_buf)) == 0) { - state->error_mask |= MAIL_ERROR_PROTOCOL; - smtpd_chat_reply(state, - "501 5.5.4 Error: Bad ORCPT parameter syntax"); - return (-1); + || (coded_addr = split_at(STR(state->dsn_orcpt_buf), + ';')) == 0) { + int good = 1; + if(strncasecmp(STR(state->dsn_orcpt_buf), "utf-8", 5) == 0) { + if (uxtext_unquote(state->dsn_buf, coded_addr) == 0) + good = 0; + } else { + if (xtext_unquote(state->dsn_buf, coded_addr) == 0 + || *(dsn_orcpt_type = STR(state->dsn_orcpt_buf)) == 0) + good = 0; + } + if (!good) { + state->error_mask |= MAIL_ERROR_PROTOCOL; + smtpd_chat_reply(state, + "501 5.5.4 Error: " + "Bad ORCPT parameter syntaxx"); + return (-1); + } } dsn_orcpt_addr = STR(state->dsn_buf); dsn_orcpt_addr_len = LEN(state->dsn_buf); @@ -2699,6 +2731,21 @@ static int rcpt_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) } /* + * Prevent mail to SMTPUTF8 addresses from senders who haven't declared + * that extension. + */ + if ((state->flags & SMTPD_FLAG_SMTPUTF8) == 0) { + unsigned char * cp = argv[2].strval; + while (cp && *cp && *cp < 128) + cp++; + if(cp && *cp >= 128) { + mail_reset(state); + smtpd_chat_reply(state, "553 5.6.7 Must declare SMTPUTF8 to send to unicode addresses"); + return (-1); + } + } + + /* * Store the recipient. Remember the first one. * * Flush recipients to maintain a stiffer coupling with the next stage and @@ -2898,6 +2945,7 @@ static int data_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *unused_argv) const CLEANUP_STAT_DETAIL *detail; const char *rfc3848_sess; const char *rfc3848_auth; + const char *protoname; #ifdef USE_TLS VSTRING *peer_CN; @@ -3055,12 +3103,22 @@ static int data_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *unused_argv) else #endif rfc3848_auth = ""; + /* RFC 6531 defines an "UTF8" prefix for ESMTP and LMTP, but + * "UTF8" + "ESMTP" becomes "UTF8SMTP" in 6531's math, so we + * have to hack here to get the WITH clause correct. + */ + if ((state->flags & SMTPD_FLAG_SMTPUTF8) == 0) + protoname = state->protocol; + else if (state->protocol[0] == 'L') + protoname = "UTF8LMTP"; + else + protoname = "UTF8SMTP"; if (state->rcpt_count == 1 && state->recipient) { out_fprintf(out_stream, REC_TYPE_NORM, state->cleanup ? "\tby %s (%s) with %s%s%s id %s" : "\tby %s (%s) with %s%s%s", var_myhostname, var_mail_name, - state->protocol, rfc3848_sess, + protoname, rfc3848_sess, rfc3848_auth, state->queue_id); quote_822_local(state->buffer, state->recipient); out_fprintf(out_stream, REC_TYPE_NORM, @@ -3071,7 +3129,7 @@ static int data_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *unused_argv) state->cleanup ? "\tby %s (%s) with %s%s%s id %s;" : "\tby %s (%s) with %s%s%s;", var_myhostname, var_mail_name, - state->protocol, rfc3848_sess, + protoname, rfc3848_sess, rfc3848_auth, state->queue_id); out_fprintf(out_stream, REC_TYPE_NORM, "\t%s", mail_date(state->arrival_time.tv_sec)); @@ -3440,6 +3498,8 @@ static int vrfy_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) smtpd_chat_reply(state, "%s", err); return (-1); } + if (argc >= 3 && strcasecmp(argv[argc - 1].strval, "smtputf8") == 0) + argc--; if (argc > 2) collapse_args(argc - 1, argv + 1); if (extract_addr(state, argv + 1, REJECT_EMPTY_ADDR, SLOPPY) != 0) { @@ -3501,8 +3561,10 @@ static int etrn_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) argv[1].strval++; /* - * As an extension to RFC 1985 we also allow an RFC 2821 address literal - * enclosed in []. + * As an extension to RFC 1985 we also allow an RFC 2821 address + * literal enclosed in []. Note that RFC6531 extends the syntax to + * allow UTF8 in the argument, but forbids clients from using UTF8 + * in EHLO, so we call valid_hostname(). */ if (!valid_hostname(argv[1].strval, DONT_GRIPE) && !valid_mailhost_literal(argv[1].strval, DONT_GRIPE)) { @@ -3673,7 +3735,7 @@ static int xclient_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) if (name_status != SMTPD_PEER_CODE_OK) { attr_value = CLIENT_NAME_UNKNOWN; } else { - if (!valid_hostname(attr_value, DONT_GRIPE)) { + if (!valid_mail_domain(attr_value, DONT_GRIPE)) { state->error_mask |= MAIL_ERROR_PROTOCOL; smtpd_chat_reply(state, "501 5.5.4 Bad %s syntax: %s", XCLIENT_NAME, attr_value); @@ -3698,7 +3760,7 @@ static int xclient_cmd(SMTPD_STATE *state, int argc, SMTPD_TOKEN *argv) if (name_status != SMTPD_PEER_CODE_OK) { attr_value = CLIENT_NAME_UNKNOWN; } else { - if (!valid_hostname(attr_value, DONT_GRIPE)) { + if (!valid_mail_domain(attr_value, DONT_GRIPE)) { state->error_mask |= MAIL_ERROR_PROTOCOL; smtpd_chat_reply(state, "501 5.5.4 Bad %s syntax: %s", XCLIENT_REVERSE_NAME, attr_value); @@ -4671,6 +4733,9 @@ static void smtpd_proto(SMTPD_STATE *state) || (ehlo_words = maps_find(ehlo_discard_maps, state->addr, 0)) == 0) ehlo_words = var_smtpd_ehlo_dis_words; state->ehlo_discard_mask = ehlo_mask(ehlo_words); +#ifdef NO_EAI + state->ehlo_discard_mask |= EHLO_MASK_SMTPUTF8; +#endif /* XXX We use the real client for connect access control. */ if (SMTPD_STAND_ALONE(state) == 0 diff --git a/src/smtpd/smtpd.h b/src/smtpd/smtpd.h index 8bd1176..40af759 100644 --- a/src/smtpd/smtpd.h +++ b/src/smtpd/smtpd.h @@ -190,6 +190,7 @@ typedef struct { #define SMTPD_FLAG_HANGUP (1<<0) /* 421/521 disconnect */ #define SMTPD_FLAG_ILL_PIPELINING (1<<1) /* inappropriate pipelining */ #define SMTPD_FLAG_AUTH_USED (1<<2) /* don't reuse SASL state */ +#define SMTPD_FLAG_SMTPUTF8 (1<<3) /* message uses RFC 6531/2 */ /* Security: don't reset SMTPD_FLAG_AUTH_USED. */ #define SMTPD_MASK_MAIL_KEEP ~0 /* keep all after MAIL reset */ diff --git a/src/smtpd/smtpd_check.c b/src/smtpd/smtpd_check.c index 79175ee..3408d74 100644 --- a/src/smtpd/smtpd_check.c +++ b/src/smtpd/smtpd_check.c @@ -1174,7 +1174,7 @@ static int reject_invalid_hostname(SMTPD_STATE *state, char *name, /* * Validate the hostname. */ - if (!valid_hostname(test_name, DONT_GRIPE) + if (!valid_mail_domain(test_name, DONT_GRIPE) && !valid_hostaddr(test_name, DONT_GRIPE)) /* XXX back compat */ stat = smtpd_check_reject(state, MAIL_ERROR_POLICY, var_bad_name_code, "5.5.2", @@ -1212,7 +1212,7 @@ static int reject_non_fqdn_hostname(SMTPD_STATE *state, char *name, /* * Validate the hostname. */ - if (!valid_hostname(test_name, DONT_GRIPE) || !strchr(test_name, '.')) + if (!valid_mail_domain(test_name, DONT_GRIPE) || !strchr(test_name, '.')) stat = smtpd_check_reject(state, MAIL_ERROR_POLICY, var_non_fqdn_code, "5.5.2", "<%s>: %s rejected: need fully-qualified hostname", @@ -1837,7 +1837,9 @@ static int reject_non_fqdn_address(SMTPD_STATE *state, char *addr, /* * Validate the domain. */ - if (!*test_dom || !valid_hostname(test_dom, DONT_GRIPE) || !strchr(test_dom, '.')) + if (!*test_dom || + !valid_mail_domain(test_dom, DONT_GRIPE) || + !strchr(test_dom, '.')) stat = smtpd_check_reject(state, MAIL_ERROR_POLICY, var_non_fqdn_code, "4.5.2", "<%s>: %s rejected: need fully-qualified address", @@ -3382,7 +3384,7 @@ static const SMTPD_RBL_STATE *find_dnsxl_domain(SMTPD_STATE *state, * the name has an alphanumerical prefix. We play safe, and skip both * RHSBL and RHSWL queries for names ending in a numerical suffix. */ - if (domain[0] == 0 || valid_hostname(domain, DONT_GRIPE) == 0) + if (domain[0] == 0 || valid_mail_domain(domain, DONT_GRIPE) == 0) return (SMTPD_CHECK_DUNNO); suffix = strrchr(domain, '.'); if (alldig(suffix == 0 ? domain : suffix + 1)) diff --git a/src/trivial-rewrite/resolve.c b/src/trivial-rewrite/resolve.c index d9a709e..a0855c3 100644 --- a/src/trivial-rewrite/resolve.c +++ b/src/trivial-rewrite/resolve.c @@ -382,7 +382,7 @@ static void resolve_addr(RES_CONTEXT *rp, char *sender, char *addr, if (*rcpt_domain == '[') { if (!valid_mailhost_literal(rcpt_domain, DONT_GRIPE)) *flags |= RESOLVE_FLAG_ERROR; - } else if (!valid_hostname(rcpt_domain, DONT_GRIPE)) { + } else if (!valid_mail_domain(rcpt_domain, DONT_GRIPE)) { if (var_resolve_num_dom && valid_hostaddr(rcpt_domain, DONT_GRIPE)) { vstring_insert(nextrcpt, rcpt_domain - STR(nextrcpt), "[", 1); vstring_strcat(nextrcpt, "]"); diff --git a/src/util/Makefile.in b/src/util/Makefile.in index 5ab2232..6813de5 100644 --- a/src/util/Makefile.in +++ b/src/util/Makefile.in @@ -27,7 +27,7 @@ SRCS = alldig.c allprint.c argv.c argv_split.c attr_clnt.c attr_print0.c \ sys_compat.c timed_connect.c timed_read.c timed_wait.c timed_write.c \ translit.c trimblanks.c unescape.c unix_connect.c unix_listen.c \ unix_recv_fd.c unix_send_fd.c unix_trigger.c unsafe.c uppercase.c \ - username.c valid_hostname.c vbuf.c vbuf_print.c vstream.c \ + username.c uses_utf_8.c valid_hostname.c vbuf.c vbuf_print.c vstream.c \ vstream_popen.c vstring.c vstring_vstream.c watchdog.c \ write_buf.c sane_basename.c format_tv.c allspace.c \ allascii.c load_file.c killme_after.c vstream_tweak.c \ @@ -65,7 +65,7 @@ OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \ sys_compat.o timed_connect.o timed_read.o timed_wait.o timed_write.o \ translit.o trimblanks.o unescape.o unix_connect.o unix_listen.o \ unix_recv_fd.o unix_send_fd.o unix_trigger.o unsafe.o uppercase.o \ - username.o valid_hostname.o vbuf.o vbuf_print.o vstream.o \ + username.o uses_utf_8.o valid_hostname.o vbuf.o vbuf_print.o vstream.o \ vstream_popen.o vstring.o vstring_vstream.o watchdog.o \ write_buf.o sane_basename.o format_tv.o allspace.o \ allascii.o load_file.o killme_after.o vstream_tweak.o \ diff --git a/src/util/host_port.c b/src/util/host_port.c index 644d8b7..e5c6eee 100644 --- a/src/util/host_port.c +++ b/src/util/host_port.c @@ -154,7 +154,7 @@ const char *host_port(char *buf, char **host, char *def_host, * Final sanity checks. We're still sloppy, allowing bare numerical * network addresses instead of requiring proper [ipaddress] forms. */ - if (*host != def_host && !valid_hostname(*host, DONT_GRIPE) + if (*host != def_host && !valid_mail_domain(*host, DONT_GRIPE) && !valid_hostaddr(*host, DONT_GRIPE)) return ("valid hostname or network address required"); if (*port != def_service && ISDIGIT(**port) && !alldig(*port)) diff --git a/src/util/printable.c b/src/util/printable.c index 9e27f94..8ea875b 100644 --- a/src/util/printable.c +++ b/src/util/printable.c @@ -41,11 +41,22 @@ char *printable(char *string, int replacement) { - char *cp; + unsigned char *cp; int ch; - for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) - if (!ISASCII(ch) || !ISPRINT(ch)) + cp = (unsigned char *)string; + while((ch = *cp) != 0) { + if (ISASCII(ch) && ISPRINT(ch)) { + // ok + } else if (ch >= 194 && ch <= 254 && cp[1] >= 128 && cp[1] < 192) { + // UTF8; skip the rest of the bytes in the character + while (cp[1] >= 128 && cp[1] < 192) + cp++; + } else { + // defintely not ok - switch to replacement *cp = replacement; + } + cp++; + } return (string); } diff --git a/src/util/stringops.h b/src/util/stringops.h index 85d2a74..b347779 100644 --- a/src/util/stringops.h +++ b/src/util/stringops.h @@ -42,6 +42,7 @@ extern int allspace(const char *); extern int allascii(const char *); extern const char *split_nameval(char *, char **, char **); extern int valid_utf_8(const char *, ssize_t); +extern int uses_utf_8(const char *); /* LICENSE /* .ad diff --git a/src/util/uses_utf_8.c b/src/util/uses_utf_8.c new file mode 100644 index 0000000..cbb801b --- /dev/null +++ b/src/util/uses_utf_8.c @@ -0,0 +1,51 @@ +/*++ +/* NAME +/* uses_utf_8 3 +/* SUMMARY +/* predicate if string is UTF-8 +/* SYNOPSIS +/* #include +/* +/* int uses_utf_8(str, len) +/* const char *str; +/* ssize_t len; +/* DESCRIPTION +/* uses_utf_8() determines if a string contains at least one byte +/* with the high bit set. It does not actually verify that the string +/* is valid UTF-8, merely that where UTF-8 is treated differently from +/* ASCII, the UTF-8 path must be taken. +/* +/* A zero-length string is considered to be ASCII. +/* DIAGNOSTICS +/* The result value is zero when the caller specifies an empty string +/* or one that contains only bytes in the range 0-127, and one if +/* at least one byte is in the range 128-255. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/*--*/ + +/* System library. */ + +#include + +/* Utility library. */ + +#include + +/* uses_utf_8 - check whether a string needs UTF-8 treatment. */ + +int uses_utf_8(const char *str) +{ + const unsigned char * cp = (const unsigned char *)str; + while (cp && *cp) + if (*cp++ > 128) + return 1; + return 0; +} diff --git a/src/util/valid_hostname.c b/src/util/valid_hostname.c index 7a40d6e..9790f90 100644 --- a/src/util/valid_hostname.c +++ b/src/util/valid_hostname.c @@ -162,6 +162,118 @@ int valid_hostname(const char *name, int gripe) return (1); } + +/* valid_mail_domain - screen out obviously bad mail domains. + + With RFC6531 support, hostnames are no longer equal to the + right-hand side of an email address. The precise rules for what + constitudes a valid right-hand side are complex; 0xC0 0x80 is + always illegal (being an overlong UTF8 form), but 0xC0 0xB0 may or + may be legal, depending on the rules of the TLD registry. + + This function is somewhat robust, but not enough that + e.g. valid_rr_name() ought to call it. + */ + +int valid_mail_domain(const char *name, int gripe) +{ + const char *myname = "valid_mail_domain"; + const unsigned char *cp; + int label_length = 0; + int label_count = 0; + int non_numeric = 0; + int ch; + + /* + * Trivial cases first. + */ + if (*name == 0) { + if (gripe) + msg_warn("%s: empty hostname", myname); + return (0); + } + + /* + * Find bad characters or label lengths. Find adjacent delimiters. + */ + cp = name; + while ((ch = *cp) != 0) { + if (ISALNUM(ch) || ch == '_' || (ch >= 194 && ch < 254)) { + if (label_length == 0) + label_count++; + label_length++; + if (label_length > VALID_LABEL_LEN) { + if (gripe) + msg_warn("%s: hostname label too long: %.100s", myname, name); + return (0); + } + if (!ISDIGIT(ch)) + non_numeric = 1; + if (ch >= 194) { + /* UTF8 consists of one byte >= 194 and then one or + more in the range 128-191. Bytes 192 and 193 are + only used by nogoodniks (192,128 is an illegal way + to encode a null character) and I think it's best + to deny them: There are no MTAs that generate that + and with which we need to be compatible. + + There are many ways to play games with UTF8 and + Unicode that we cannot possible detect here, so + we'll just test that the sequence looks vaguely + plausible and leave the rest to ICU + (see callers of uidna_labelToASCII_UTF8()). + */ + while (cp[1] >= 128 && cp[1] < 192) + cp++; + } + } else if (ch == '.') { + if (label_length == 0 || cp[1] == 0) { + if (gripe) + msg_warn("%s: misplaced delimiter: %.100s", myname, name); + return (0); + } + label_length = 0; + } else if (ch == '-') { + non_numeric = 1; + label_length++; + if (label_length == 1 || cp[1] == 0 || cp[1] == '.') { + if (gripe) + msg_warn("%s: misplaced hyphen: %.100s", myname, name); + return (0); + } + } +#ifdef SLOPPY_VALID_HOSTNAME + else if (ch == ':' && valid_ipv6_hostaddr(name, DONT_GRIPE)) { + non_numeric = 0; + break; + } +#endif + else { + if (gripe) + msg_warn("%s: invalid character %d(decimal): %.100s", + myname, ch, name); + return (0); + } + cp++; + } + + if (non_numeric == 0) { + if (gripe) + msg_warn("%s: numeric hostname: %.100s", myname, name); +#ifndef SLOPPY_VALID_HOSTNAME + return (0); +#endif + } + if ((const char *)cp - name > VALID_HOSTNAME_LEN) { + if (gripe) + msg_warn("%s: bad length %d for %.100s...", + myname, (int) ((const char *)cp - name), name); + return (0); + } + return (1); +} + + /* valid_hostaddr - verify numerical address syntax */ int valid_hostaddr(const char *addr, int gripe) diff --git a/src/util/valid_hostname.h b/src/util/valid_hostname.h index b06fc17..7b8465f 100644 --- a/src/util/valid_hostname.h +++ b/src/util/valid_hostname.h @@ -24,6 +24,7 @@ extern int valid_hostaddr(const char *, int); extern int valid_ipv4_hostaddr(const char *, int); extern int valid_ipv6_hostaddr(const char *, int); extern int valid_hostport(const char *, int); +extern int valid_mail_domain(const char *, int); /* LICENSE /* .ad