#!/usr/bin/perl -w =head1 NAME uri_badip - URI address range check $Id: uri_badip 399 2004-10-22 07:37:04Z aqua $ =head1 DESCRIPTION This plugin scans the body of an email for URIs, flagging or rejecting any whose host portion resolves to an IP found in a configured set of CIDR ranges. This enables one to block spam advertising websites hosted by spam-friendly ISPs, without needing to know about the specific sites in advance. This approach is very prone to false-positives in some contexts, and should be used with care. It is probably best combined with SpamAssassin or other scoring system, configured to match the header added by this plugin. Much of this is based on uribl, the plugin written for use with the SURBL URI blacklist, and the general idea comes from there. =head1 CONFIGURATION To enable the plugin, add it to I<~qpsmtpd/config/plugins>. The list of rejected IP ranges, in CIDR format, should be given in I<~qpsmtpd/config/uri_badip>. You may specify the following configuration option(s): =over 4 =item action Specifies what to do when a URI is matched in a rejected IP range. Available options are I (the default) I and I. If set to add-header, an X-URI-Address-Match: header will be added explaining the problem. If set to 'deny,' the delivery will be declined with a hard failure. If set to denysoft, the delivery will be soft failed (this is probably not a good idea.) =item scan-headers If set true, any headers found in the URIs will be checked as well. Disabled by default. =item fold-domains-at-depth When noticing a URL whose hostname exceeds this number of subdomains, drop subdomains exceeding this depth before issuing the DNS lookup. For example, with a setting of 3, foo.bar.snaf.com will be resolved as if it were bar.snaf.com, and a subsequent gurgle.bar.snaf.com would be ignored. If set to a low value, such as 2, this can mitigate attempts by spammers to decoy URI checks by inserting a great many URIs with randomized subdomains to incur many namesever lookups. See also I and I. =item fold-domains-to-original If set to a true value, when a domain is encountered exceeding the length specified by I, rather than truncating the domain to that depth, the first match is queried as-is, but subsequent URIs found with hostnames having the same upper-level domains will be skipped. =item limit Imposes a strict limit on the number of lookups to be performed; if a message exceeds this number of unique hostnames, only the first I of them will be checked. This can eliminate the potential for DoS attacks or general slowness. Note that if this is set too low, the test is more vulnerable to being spoofed by spam that mentions a large number of innocent bystander URIs at the outset (e.g. in an invisible HTML section or comment), then places the real spamvertised URIs later on. =back =head1 BUGS Does not yet detect URIs munged by URI percentage-style escape codes, and may miss some other munging as well. Does not attempt to pick out hostnames not found in URIs with a protocol component, such as a bare "www.domain.tld". =head1 LICENSE Copyright (c) 2004 by Devin Carraway . Newer versions may be obtained from http://devin.com/qpsmtpd/. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =cut use Net::CIDR; use Net::DNS::Resolver; use IO::Select; use Time::HiRes qw(time); use strict; use warnings; sub register { my ($self, $qp, %args) = @_; $self->{action} = $args{action} || 'add-header'; $self->{timeout} = $args{timeout} || 30; $self->{record_type} = $args{'record-type'} || 'A'; $self->{check_headers} = $args{'check-headers'}; $self->{fold_domain_depth} = $args{'fold-domains-at-depth'} || 3; $self->{fold_domain_orig} = $args{'fold-domains-to-original'}; $self->{lookup_limit} = $args{'limit'}; $self->register_hook('data_post', 'data_handler'); } sub data_handler { my ($self, $txn) = @_; my $l; my (%lookups, %lookups_exist); my @addresses; my @ranges = $self->qp->config('uri_badip'); @ranges or return DECLINED; my $res; my $select; my @qp_continuations; my $limit = $self->{lookup_limit} || -1; my $fold = $self->{fold_domain_depth}; my $fold_excess_pat; my %fold_skip; if ($fold) { my $pat = '[\\w\\-]\\.('; $pat .= '[\\w\\-]+\\.' for (1..($fold-1)); $pat .= '\\w{2,4})'; $fold_excess_pat = qr/$pat/; } for (0..$#ranges) { if ($ranges[$_] =~ /(\S+)(?:\s(.*))?/) { $ranges[$_] = [ $1, $2 ]; } } my $cutoff_time = time + $self->{timeout}; my $first_query_time; $txn->body_resetpos; while (!$self->{check_headers} and $l = $txn->body_getline) { chomp $l; last if !$l; } LINE: while ($l = $txn->body_getline) { chomp $l; if ($l =~ /(.*)=$/) { push @qp_continuations, $1; } elsif (@qp_continuations) { $l = join('', @qp_continuations, $l); @qp_continuations = (); } # Undo URI and quoted-printable escape munging $l =~ s/[=%]([0-9A-Fa-f]{2,2})/chr(hex($1))/ge; # Undo HTML entity munging (e.g. in parameterized redirects) $l =~ s/&#(\d{2,3});?/chr($1)/ge; while ($l =~ m{ \w{3,16}:/+ # protocol (?:\S+@)? # user/pass (\d{7,}) # raw-numeric IP (?::\d+)?([/?\s]|$) # port, slash # or EOL }gx) { my @octets = ( (($1 >> 24) & 0xff), (($1 >> 16) & 0xff), (($1 >> 8) & 0xff), ($1 & 0xff) ); my $fwd = join('.', @octets); $self->log(LOGDEBUG, "matched pure-integer ipaddr $1 ($fwd)"); if (my $note = _find($fwd, \ @ranges)) { $self->log(LOGNOTICE, $note); if ($self->{action} eq 'add-header') { $txn->header->add('X-URI-Address-Match', $note); } elsif ($self->{action} eq 'deny') { return (DENY, $note); } elsif ($self->{action} eq 'denysoft') { return (DENYSOFT, $note); } } } while ($l =~ m{ \w{3,16}:/+ # protocol (?:\S+@)? # user/pass (\d+|0[xX][0-9A-Fa-f]+)\. # IP address (\d+|0[xX][0-9A-Fa-f]+)\. (\d+|0[xX][0-9A-Fa-f]+)\. (\d+|0[xX][0-9A-Fa-f]+) }gx) { my @octets = ($1,$2,$3,$4); # return any octal/hex octets in the IP addr back # to decimal form (e.g. http://0x7f.0.0.00001) for (0..$#octets) { $octets[$_] =~ s/^0([0-7]+)$/oct($1)/e; $octets[$_] =~ s/^0x([0-9a-fA-F]+)$/hex($1)/e; } my $fwd = join('.', @octets); $self->log(LOGDEBUG, "matched URI ipaddr $fwd"); if (my $note = _find($fwd, \ @ranges)) { $self->log(LOGNOTICE, $note); if ($self->{action} eq 'add-header') { $txn->header->add('X-URI-Address-Match', $note); } elsif ($self->{action} eq 'deny') { return (DENY, $note); } elsif ($self->{action} eq 'denysoft') { return (DENYSOFT, $note); } } } HOST: while ($limit && $l =~ m{ \w{3,16}:/+ # protocol (?:\S+@)? # user/pass ([\w\-.]+\.[a-zA-Z]{2,8}) # hostname }gx) { my $host = $1; $self->log(LOGDEBUG, "matched URI hostname $host"); if ($fold) { if ($host =~ /$fold_excess_pat/) { my $folded = $1; $self->log(LOGDEBUG, "folding $host -> $folded"); if ($self->{fold_domain_orig}) { if ($fold_skip{$folded}) { $self->log(LOGDEBUG, "Skipping $host; lookup of $fold_skip{$folded} already underway"); next HOST; } $fold_skip{$folded} = $host; } $host = $folded; } } if ($lookups_exist{$host}) { $self->log(LOGDEBUG, "Skipping $host; lookup already underway"); next HOST; } $lookups_exist{$host} = 1; unless ($res) { $res = new Net::DNS::Resolver or return DECLINED; $res->udp_timeout($cutoff_time - time); $first_query_time = time; } my $sock = $res->bgsend($host, $self->{record_type}); $lookups{"$sock"} = $host; $select ||= new IO::Select; $select->add($sock); $limit--; } } $txn->body_resetpos; unless ($select) { $self->log(LOGINFO, "No URIs found in mail"); return DECLINED; } my $c = 0; $self->log(LOGNOTICE, "Waiting on ".$select->count." lookup(s)"); while ($select->count > 0) { my $remaining = $cutoff_time - time; $remaining = 0 if $remaining < 0; $self->log(LOGDEBUG, "cutoff in $remaining sec"); my @ready = $select->can_read($remaining); unless (@ready) { my $n = "DNS timeout waiting for ". $select->count. " lookup(s): ". join(', ', sort values %lookups); $self->log(LOGNOTICE, $n); last; } for my $s (@ready) { $select->remove($s); my $packet = $res->bgread($s) || next; $c++; ANSWER: for my $rr ($packet->answer) { next ANSWER unless $rr->type eq $self->{record_type}; my $addr = $rr->address or next ANSWER; my $name = $lookups{"$s"}; $self->log(LOGNOTICE, "URI host $name -> $addr"); if (my $msg = _find($addr, \ @ranges, $s)) { my $note = "$name/$addr in blacklisted range ($msg)"; $self->log(LOGNOTICE, $note); if ($self->{action} eq 'add-header') { $txn->header->add('X-URI-Address-Match', $note); } elsif ($self->{action} eq 'deny') { return (DENY, $note); } elsif ($self->{action} eq 'denysoft') { return (DENYSOFT, $note); } } } } if (!$select->count) { my $elapsed = time - $first_query_time; $self->log(LOGINFO, sprintf("$c lookup(s) finished in %.2f sec", $elapsed)); } } return DECLINED; } sub _find { my $ip = shift || return undef; my $ranges = shift || return undef; my $hostname = shift; for (@{$ranges}) { if (Net::CIDR::cidrlookup($ip, $_->[0])) { return $_->[1] || $_->[0] || 'blacklisted range'; } } return ''; } 1;