File: /volume1/@appstore/MailPlus-Server/lib/MIMEDefang/MailPlusServer/PhishingLinkChecker.pm
package MailPlusServer::PhishingLinkChecker;
use strict;
use warnings;
use MailPlusServer::Log;
use MailPlusServer::Util;
# Phishing blacklist/whitelist
$MailPlusServer::PhishingLinkChecker::PhishingBlacklist = '/var/packages/MailPlus-Server/target/etc/mimedefang/phishing.bad.sites.conf';
$MailPlusServer::PhishingLinkChecker::PhishingWhitelist = '/var/packages/MailPlus-Server/target/etc/mimedefang/phishing.safe.sites.conf';
use vars qw(%PhishingWhitelist); # Whitelist of hostnames for Phishing Net
use vars qw(%PhishingBlacklist); # Blacklist of hostnames for Phishing Net
my %CharToInternational = (
160,'nbsp',
161,'iexcl',
162,'cent',
163,'pound',
164,'curren',
165,'yen',
166,'brvbar',
167,'sect',
168,'uml',
169,'copy',
170,'ordf',
171,'laquo',
172,'not',
173,'shy',
174,'reg',
175,'macr',
176,'deg',
177,'plusmn',
178,'sup2',
179,'sup3',
180,'acute',
181,'micro',
182,'para',
183,'middot',
184,'cedil',
185,'sup1',
186,'ordm',
187,'raquo',
188,'frac14',
189,'frac12',
190,'frac34',
191,'iquest',
192,'Agrave',
193,'Aacute',
194,'Acirc',
195,'Atilde',
196,'Auml',
197,'Aring',
198,'AElig',
199,'Ccedil',
200,'Egrave',
201,'Eacute',
202,'Ecirc',
203,'Euml',
204,'Igrave',
205,'Iacute',
206,'Icirc',
207,'Iuml',
208,'ETH',
209,'Ntilde',
210,'Ograve',
211,'Oacute',
212,'Ocirc',
213,'Otilde',
214,'Ouml',
215,'times',
216,'Oslash',
217,'Ugrave',
218,'Uacute',
219,'Ucirc',
220,'Uuml',
221,'Yacute',
222,'THORN',
223,'szlig',
224,'agrave',
225,'aacute',
226,'acirc',
227,'atilde',
228,'auml',
229,'aring',
230,'aelig',
231,'ccedil',
232,'egrave',
233,'eacute',
234,'ecirc',
235,'euml',
236,'igrave',
237,'iacute',
238,'icirc',
239,'iuml',
240,'eth',
241,'ntilde',
242,'ograve',
243,'oacute',
244,'ocirc',
245,'otilde',
246,'ouml',
247,'divide',
248,'oslash',
249,'ugrave',
250,'uacute',
251,'ucirc',
252,'uuml',
253,'yacute',
254,'thorn',
255,'yuml'
);
# Turn any character into an international version of it if it is in the range
# 160 to 255.
sub CharToIntnl {
my $p = shift @_;
# Passed in an 8-bit character.
#print STDERR "Char in is $p\n";
($a) = unpack 'C', $p;
#print STDERR "Char is $a, $p\n";
# Bash char 160 (space) to nothing
return '' if $a == 160;
my $char = $CharToInternational{$a};
return '&' . $char . ';' if $char ne "";
return $p;
}
# Like CharToIntnl but does entire string
# Return undefined if input is empty string, due to unpacking with empty string
sub StringToIntnl {
my $original = shift;
# Much faster char conversion for whole strings
my(@newlinkurl, $newlinkurl, $char);
@newlinkurl = unpack("C*", $original); # Get an array of characters
foreach (@newlinkurl) {
next if $_ == 160;
$char = $CharToInternational{$_};
if (defined $char) {
$newlinkurl .= '&' . $char . ';';
} else {
$newlinkurl .= chr($_);
}
}
return $newlinkurl;
#$linkurl = $newlinkurl unless $newlinkurl eq "";
#$linkurl =~ s/./CharToIntnl("$&")/ge; -- Old slow version
}
# Clean up a link URL so it is suitable for phishing detection
# Return (clean url, alarm trigger value). An alarm trigger value non-zero
# means this is definitely likely to be a phishing trap, no matter what
# anything else says.
sub CleanLinkURL {
my($DisarmLinkURL, $DisarmBaseURL) = @_;
use bytes;
my($linkurl,$alarm);
$alarm = 0;
$linkurl = $DisarmLinkURL;
$linkurl = lc($linkurl);
$linkurl =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
$linkurl = StringToIntnl($linkurl);
return ("",0) unless $linkurl =~ /[.\/]/; # Ignore if it is not a website at all
$linkurl =~ s/\s+//g; # Remove any whitespace
$linkurl =~ s/\\/\//g; # Change \ to / as many browsers do this
return ("",0) if $linkurl =~ /\@/ && $linkurl !~ /\//; # Ignore emails
$linkurl =~ s/[,.]+$//; # Remove trailing dots, but also commas while at it
$linkurl =~ s/^\[\d*\]//; # Remove leading [numbers]
$linkurl =~ s/^blocked[:\/]+//i; # Remove "blocked::" labels
$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 2
$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 3
$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 4
$linkurl =~ s/^outbind:\/\/\d+\//http:\/\//i; # Remove "outbind://22/" type labels
$linkurl = $DisarmBaseURL . '/' . $linkurl
if $linkurl ne "" && $DisarmBaseURL ne "" &&
$linkurl !~ /^(https?|ftp|mailto|webcal):/i;
$linkurl =~ s/^(https?:\/\/[^:]+):80($|\D)/$1/i; # Remove http://....:80
$linkurl =~ s/^(https?|ftp|webcal)[:;]\/\///i;
return ("",0) if $linkurl =~ /^ma[il]+to[:;]/i;
$linkurl =~ s/[?\/].*$//; # Only compare up to the first '/' or '?'
$linkurl =~ s/(\<\/?(br|p|ul)\>)*$//ig; # Remove trailing br, p, ul tags
return ("",0) if $linkurl =~ /^file:/i; # Ignore file: URLs completely
return ("",0) if $linkurl =~ /^#/; # Ignore internal links completely
$linkurl =~ s/\/$//; # LinkURL is trimmed -- note
$alarm = 1 if $linkurl =~ s/[\x00-\x1f[:^ascii:]]/_BAD_/g; # /\&\#/;
$linkurl = 'JavaScript' if $linkurl =~ /^javascript:/i;
($linkurl, $alarm);
}
sub FetchPhishingLink {
my($DisarmLinkText, $DisarmLinkURL, $DisarmBaseURL, $PhishingWarningText) = @_;
my($squashedtext,$linkurl,$alarm,$numbertrap);
$squashedtext = lc($DisarmLinkText);
return '' if $squashedtext eq '';
# Try to filter out mentions of Microsoft's .NET system
$squashedtext = "" if $squashedtext eq ".net";
$squashedtext = "" if $squashedtext =~ /(^|\b)(ado|asp)\.net($|\b)/;
$squashedtext =~ s/\%a0//g;
$squashedtext =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
$squashedtext =~ s/\\/\//g; # Change \ to / as many browsers do this
$squashedtext =~ s/^\[\d*\]//; # Removing leading [numbers]
$squashedtext =~ tr/\n/ /; # Join multiple lines onto 1 line
$squashedtext =~ s/(\<\/?[a-z][a-z0-9:._-]*((\s+[a-z][a-z0-9:._-]*(\s*=\s*(?:\".*?\"|\'.*?\'|[^\'\">\s]+))?)+\s*|\s*)\/?\>)*//ig; # Remove tags, better re from snifer_@hotmail.com
# Do not remove non-domain text sequence as we add blacklist/whitelist back in MailPlus Server #2402
$squashedtext =~ s/\s+//g; # Remove any whitespace
$squashedtext =~ s/^[^\/:]+\@//; # Remove username of email addresses
$squashedtext =~ s/^.*(\<\;|\<)((https?|ftp|mailto|webcal):.+?)(\>\;|\>).*$/$2/i; # Turn blah-blah <http://link.here> blah-blah into "http://link.here"
$squashedtext =~ s/^\<\;//g; # Remove leading <
$squashedtext =~ s/\>\;$//g; # Remove trailing >
$squashedtext =~ s/\<\;/\</g; # Remove things like < and >
$squashedtext =~ s/\>\;/\>/g; # Remove things like < and >
$squashedtext =~ s/\ \;//g; # Remove fixed spaces
$squashedtext =~ s/^(http:\/\/[^:]+):80(\D|$)/$1$2/i; # Remove http:...:80
$squashedtext =~ s/^(https:\/\/[^:]+):443(\D|$)/$1$2/i; # Remove https:...:443
$squashedtext = StringToIntnl($squashedtext); # s/./CharToIntnl("$&")/ge;
$squashedtext = '' if !defined($squashedtext); # StringToIntnl() may return undefined if input is empty string
# If it looks like a link, remove any leading https:// or ftp://
($linkurl,$alarm) = CleanLinkURL($DisarmLinkURL, $DisarmBaseURL);
# Has it fallen foul of the numeric-ip phishing net? Must treat x
# like a digit so it catches 0x41 (= 'A')
$numbertrap = ($linkurl !~ /[<>g-wyz]+/) ? 1 : 0;
if ($alarm ||
$squashedtext =~ /^(w+|ft+p|fpt+|ma[il]+to)([.,]|\%2e)/i ||
$squashedtext =~ /[.,](com|org|net|info|biz|ws)/i ||
$squashedtext =~ /[.,]com?[.,][a-z][a-z]/i ||
$squashedtext =~ /^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/i ||
$numbertrap) {
$squashedtext =~ s/^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/$3/i;
$squashedtext =~ s/^.*?-http:\/\///; # 20080206 Delete common pre-pended text
$squashedtext =~ s/\/.*$//; # Only compare the hostnames
$squashedtext =~ s/[,.]+$//; # Allow trailing dots and commas
$squashedtext = 'www.' . $squashedtext
unless $squashedtext =~ /^ww+|ft+p|fpt+|mailto|webcal/ || $numbertrap;
# If we have already tagged this link as a phishing attack, spot the
# warning text we inserted last time and don't tag it again.
my $squashedpossible = lc($PhishingWarningText);
my $squashedsearch = lc($DisarmLinkText);
$squashedpossible =~ s/\s//g;
$squashedpossible =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
$squashedsearch =~ s/\s//g;
$squashedsearch =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
$squashedpossible = quotemeta($squashedpossible);
if ($squashedtext =~ /$squashedpossible/) {
return '';
}
# Ignore linkurl in whitelist
if (InPhishingWhitelist($linkurl)) {
return '';
}
#
# Known Dangerous Sites List code here
#
if (InPhishingBlacklist($linkurl)) {
return $linkurl;
}
#
# Strict Phishing Net Goes Here
#
if ($alarm ||
($linkurl ne "" && $squashedtext !~ /^(w+\.)?\Q$linkurl\E\/?$/)
|| ($linkurl ne "" && $numbertrap)) {
if ($linkurl eq "" || !$numbertrap || $linkurl ne $squashedtext) {
return $linkurl;
}
}
}
return '';
}
sub ReadPhishingBlacklistWhitelist {
%PhishingWhitelist = ReadPhishingWhitelist($MailPlusServer::PhishingLinkChecker::PhishingWhitelist);
%PhishingBlacklist = ReadPhishingBlacklist($MailPlusServer::PhishingLinkChecker::PhishingBlacklist);
}
# Code below is from MailScanner Config.pm
# Read the list of hostnames to be ignored when doing phishing tests.
# Pass in the filename. Return the hash.
#
# In the code for this, it's a direct copy of the ReadPhishingWhitelist() sub,
# so the white and black names are reversed from what would seem logical.
sub ReadPhishingBlacklist {
my($filename) = @_;
my($fname, $fh, %whitelist, @blacklist, $counter);
%whitelist = ();
# Skip this if they have findphishing = no
# return if MailScanner::Config::IsSimpleValue('findphishing') &&
# !MailScanner::Config::Value('findphishing');
$filename =~ s/^\s*//g;
$filename =~ s/\s*$//g;
return () unless $filename;
$counter = 0;
foreach $fname (split(" ", $filename)) {
next unless $fname;
$fh = new FileHandle;
unless (open($fh, "<$fname")) {
MailPlusServer::Log::WarnLog("Could not read phishing blacklist file %s", $fname);
next;
}
while(<$fh>) {
chomp;
s/^#.*$//; # Remove comments
s/^\s*//g; # Remove leading white space
s/\s*$//g; # Remove trailing white space
s/\s+.*$//g; # Leave only the 1st word
next if /^$/;
# Entries in the list starting with "REMOVE " in capitals cause the entry
# to be forcibly removed from the phishing whitelist.
if (/^REMOVE\s+(\S+)/i) {
delete $whitelist{$1};
push @blacklist, $1;
} else {
$whitelist{$_} = 1;
$counter++;
}
}
# Now process the blacklist
foreach (@blacklist) {
delete $whitelist{$_};
}
close $fh;
}
MailPlusServer::Log::InfoLog("Read %d hostnames from the phishing blacklists", $counter);
return %whitelist;
}
# Read the list of hostnames to be ignored when doing phishing tests.
# Pass in the filename. Return the hash.
sub ReadPhishingWhitelist {
my($filename) = @_;
my($fname, $fh, %whitelist, @blacklist, $counter);
%whitelist = ();
# Skip this if they have findphishing = no
# return if MailScanner::Config::IsSimpleValue('findphishing') &&
# !MailScanner::Config::Value('findphishing');
$filename =~ s/^\s*//g;
$filename =~ s/\s*$//g;
return () unless $filename;
$counter = 0;
foreach $fname (split(" ", $filename)) {
$fh = new FileHandle;
unless (open($fh, "<$fname")) {
MailPlusServer::Log::WarnLog("Could not read phishing whitelist file %s", $fname);
next;
}
while(<$fh>) {
chomp;
s/^#.*$//; # Remove comments
s/^\s*//g; # Remove leading white space
s/\s*$//g; # Remove trailing white space
s/\s+.*$//g; # Leave only the 1st word
next if /^$/;
# Entries in the list starting with "REMOVE " in capitals cause the entry
# to be forcibly removed from the phishing whitelist.
if (/^REMOVE\s+(\S+)/i) {
delete $whitelist{$1};
push @blacklist, $1;
} else {
$whitelist{$_} = 1;
$counter++;
}
}
# Now process the blacklist
foreach (@blacklist) {
delete $whitelist{$_};
}
close $fh;
}
MailPlusServer::Log::InfoLog("Read %d hostnames from the phishing whitelist", $counter);
return %whitelist;
}
# Return 1 if the hostname in $linkurl is in the safe sites file.
# Return 0 otherwise.
sub InPhishingWhitelist {
my($linkurl) = @_;
# Quick lookup
return 1 if $MailPlusServer::PhishingLinkChecker::PhishingWhitelist{$linkurl};
# Trim host. off the front of the hostname
# This is needed to process wildcards in the whitelist
while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
# And replace it with *. then look it up
#print STDERR "Looking up *.$linkurl\n";
return 1 if $MailPlusServer::PhishingLinkChecker::PhishingWhitelist{'*.' . $linkurl};
}
return 0;
}
# Return 1 if the hostname in $linkurl is in the bad sites file.
sub InPhishingBlacklist {
my($linkurl) = @_;
# Quick lookup
return 1 if $MailPlusServer::PhishingLinkChecker::PhishingBlacklist{$linkurl};
# Trim host. off the front of the hostname
# This is needed to process wildcards in the whitelist
while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
# And replace it with *. then look it up
#print STDERR "Looking up *.$linkurl\n";
return 1 if $MailPlusServer::PhishingLinkChecker::PhishingBlacklist{'*.' . $linkurl};
}
return 0;
}
1;