HEX
Server: Apache/2.2.34 (Unix) mod_fastcgi/mod_fastcgi-SNAP-0910052141
System: Linux Kou-Etsu-Dou 4.4.59+ #25556 SMP PREEMPT Thu Mar 4 18:03:46 CST 2021 x86_64
User: hosam (1026)
PHP: 7.2.29
Disabled: NONE
Upload Files
File: /volume1/@appstore/MailPlus-Server/lib/MIMEDefang/MailPlusServer/PhishingLinkChecker.pm
package MailPlusServer::PhishingLinkChecker;

use strict;
use warnings;

use MailPlusServer::Log;
use MailPlusServer::Util;

# Phishing blacklist/whitelist
$MailPlusServer::PhishingLinkChecker::PhishingBlacklist = '/var/packages/MailPlus-Server/target/etc/mimedefang/phishing.bad.sites.conf';
$MailPlusServer::PhishingLinkChecker::PhishingWhitelist = '/var/packages/MailPlus-Server/target/etc/mimedefang/phishing.safe.sites.conf';

use vars qw(%PhishingWhitelist); # Whitelist of hostnames for Phishing Net
use vars qw(%PhishingBlacklist); # Blacklist of hostnames for Phishing Net

my %CharToInternational = (
	160,'nbsp',
	161,'iexcl',
	162,'cent',
	163,'pound',
	164,'curren',
	165,'yen',
	166,'brvbar',
	167,'sect',
	168,'uml',
	169,'copy',
	170,'ordf',
	171,'laquo',
	172,'not',
	173,'shy',
	174,'reg',
	175,'macr',
	176,'deg',
	177,'plusmn',
	178,'sup2',
	179,'sup3',
	180,'acute',
	181,'micro',
	182,'para',
	183,'middot',
	184,'cedil',
	185,'sup1',
	186,'ordm',
	187,'raquo',
	188,'frac14',
	189,'frac12',
	190,'frac34',
	191,'iquest',
	192,'Agrave',
	193,'Aacute',
	194,'Acirc',
	195,'Atilde',
	196,'Auml',
	197,'Aring',
	198,'AElig',
	199,'Ccedil',
	200,'Egrave',
	201,'Eacute',
	202,'Ecirc',
	203,'Euml',
	204,'Igrave',
	205,'Iacute',
	206,'Icirc',
	207,'Iuml',
	208,'ETH',
	209,'Ntilde',
	210,'Ograve',
	211,'Oacute',
	212,'Ocirc',
	213,'Otilde',
	214,'Ouml',
	215,'times',
	216,'Oslash',
	217,'Ugrave',
	218,'Uacute',
	219,'Ucirc',
	220,'Uuml',
	221,'Yacute',
	222,'THORN',
	223,'szlig',
	224,'agrave',
	225,'aacute',
	226,'acirc',
	227,'atilde',
	228,'auml',
	229,'aring',
	230,'aelig',
	231,'ccedil',
	232,'egrave',
	233,'eacute',
	234,'ecirc',
	235,'euml',
	236,'igrave',
	237,'iacute',
	238,'icirc',
	239,'iuml',
	240,'eth',
	241,'ntilde',
	242,'ograve',
	243,'oacute',
	244,'ocirc',
	245,'otilde',
	246,'ouml',
	247,'divide',
	248,'oslash',
	249,'ugrave',
	250,'uacute',
	251,'ucirc',
	252,'uuml',
	253,'yacute',
	254,'thorn',
	255,'yuml'
);

# Turn any character into an international version of it if it is in the range
# 160 to 255.
sub CharToIntnl {
	my $p = shift @_;
	# Passed in an 8-bit character.
	#print STDERR "Char in is $p\n";
	($a) = unpack 'C', $p;

	#print STDERR "Char is $a, $p\n";

	# Bash char 160 (space) to nothing
	return '' if $a == 160;
	my $char = $CharToInternational{$a};
	return '&' . $char . ';' if $char ne "";
	return $p;
}

# Like CharToIntnl but does entire string
# Return undefined if input is empty string, due to unpacking with empty string
sub StringToIntnl {
	my $original = shift;

	# Much faster char conversion for whole strings
	my(@newlinkurl, $newlinkurl, $char);
	@newlinkurl = unpack("C*", $original); # Get an array of characters
	foreach (@newlinkurl) {
		next if $_ == 160;
		$char = $CharToInternational{$_};
		if (defined $char) {
			$newlinkurl .= '&' . $char . ';';
		} else {
			$newlinkurl .= chr($_);
		}
	}
	return $newlinkurl;
	#$linkurl = $newlinkurl unless $newlinkurl eq "";
	#$linkurl =~ s/./CharToIntnl("$&")/ge; -- Old slow version
}


# Clean up a link URL so it is suitable for phishing detection
# Return (clean url, alarm trigger value). An alarm trigger value non-zero
# means this is definitely likely to be a phishing trap, no matter what
# anything else says.
sub CleanLinkURL {
	my($DisarmLinkURL, $DisarmBaseURL) = @_;

	use bytes;

	my($linkurl,$alarm);
	$alarm = 0;

	$linkurl = $DisarmLinkURL;
	$linkurl = lc($linkurl);
	$linkurl =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
	$linkurl = StringToIntnl($linkurl);
	return ("",0) unless $linkurl =~ /[.\/]/; # Ignore if it is not a website at all

	$linkurl =~ s/\s+//g; # Remove any whitespace
	$linkurl =~ s/\\/\//g; # Change \ to / as many browsers do this
	return ("",0) if $linkurl =~ /\@/ && $linkurl !~ /\//; # Ignore emails

	$linkurl =~ s/[,.]+$//; # Remove trailing dots, but also commas while at it
	$linkurl =~ s/^\[\d*\]//; # Remove leading [numbers]
	$linkurl =~ s/^blocked[:\/]+//i; # Remove "blocked::" labels
	$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 2
	$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 3
	$linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 4
	$linkurl =~ s/^outbind:\/\/\d+\//http:\/\//i; # Remove "outbind://22/" type labels

	$linkurl = $DisarmBaseURL . '/' . $linkurl
		if $linkurl ne "" && $DisarmBaseURL ne "" &&
			$linkurl !~ /^(https?|ftp|mailto|webcal):/i;

	$linkurl =~ s/^(https?:\/\/[^:]+):80($|\D)/$1/i; # Remove http://....:80
	$linkurl =~ s/^(https?|ftp|webcal)[:;]\/\///i;
	return ("",0) if $linkurl =~ /^ma[il]+to[:;]/i;

	$linkurl =~ s/[?\/].*$//; # Only compare up to the first '/' or '?'
	$linkurl =~ s/(\<\/?(br|p|ul)\>)*$//ig; # Remove trailing br, p, ul tags
	return ("",0) if $linkurl =~ /^file:/i; # Ignore file: URLs completely

	return ("",0) if $linkurl =~ /^#/; # Ignore internal links completely

	$linkurl =~ s/\/$//; # LinkURL is trimmed -- note
	$alarm = 1 if $linkurl =~ s/[\x00-\x1f[:^ascii:]]/_BAD_/g; # /\&\#/;
	$linkurl = 'JavaScript' if $linkurl =~ /^javascript:/i;
	($linkurl, $alarm);
}

sub FetchPhishingLink {
	my($DisarmLinkText, $DisarmLinkURL, $DisarmBaseURL, $PhishingWarningText) = @_;
	my($squashedtext,$linkurl,$alarm,$numbertrap);

	$squashedtext = lc($DisarmLinkText);
	return '' if $squashedtext eq '';

	# Try to filter out mentions of Microsoft's .NET system
	$squashedtext = "" if $squashedtext eq ".net";
	$squashedtext = "" if $squashedtext =~ /(^|\b)(ado|asp)\.net($|\b)/;

	$squashedtext =~ s/\%a0//g;
	$squashedtext =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
	$squashedtext =~ s/\\/\//g; # Change \ to / as many browsers do this
	$squashedtext =~ s/^\[\d*\]//; # Removing leading [numbers]
	$squashedtext =~ tr/\n/ /; # Join multiple lines onto 1 line
	$squashedtext =~ s/(\<\/?[a-z][a-z0-9:._-]*((\s+[a-z][a-z0-9:._-]*(\s*=\s*(?:\".*?\"|\'.*?\'|[^\'\">\s]+))?)+\s*|\s*)\/?\>)*//ig; # Remove tags, better re from snifer_@hotmail.com

	# Do not remove non-domain text sequence as we add blacklist/whitelist back in MailPlus Server #2402

	$squashedtext =~ s/\s+//g; # Remove any whitespace
	$squashedtext =~ s/^[^\/:]+\@//; # Remove username of email addresses
	$squashedtext =~ s/^.*(\&lt\;|\<)((https?|ftp|mailto|webcal):.+?)(\&gt\;|\>).*$/$2/i; # Turn blah-blah <http://link.here> blah-blah into "http://link.here"
	$squashedtext =~ s/^\&lt\;//g; # Remove leading &lt;
	$squashedtext =~ s/\&gt\;$//g; # Remove trailing &gt;
	$squashedtext =~ s/\&lt\;/\</g; # Remove things like &lt; and &gt;
	$squashedtext =~ s/\&gt\;/\>/g; # Remove things like &lt; and &gt;
	$squashedtext =~ s/\&nbsp\;//g; # Remove fixed spaces
	$squashedtext =~ s/^(http:\/\/[^:]+):80(\D|$)/$1$2/i; # Remove http:...:80
	$squashedtext =~ s/^(https:\/\/[^:]+):443(\D|$)/$1$2/i; # Remove https:...:443
	$squashedtext =  StringToIntnl($squashedtext); # s/./CharToIntnl("$&")/ge;
	$squashedtext =  '' if !defined($squashedtext); # StringToIntnl() may return undefined if input is empty string
	# If it looks like a link, remove any leading https:// or ftp://
	($linkurl,$alarm) = CleanLinkURL($DisarmLinkURL, $DisarmBaseURL);

	# Has it fallen foul of the numeric-ip phishing net? Must treat x
	# like a digit so it catches 0x41 (= 'A')
	$numbertrap = ($linkurl !~ /[<>g-wyz]+/) ? 1 : 0;

	if ($alarm ||
		$squashedtext =~ /^(w+|ft+p|fpt+|ma[il]+to)([.,]|\%2e)/i ||
		$squashedtext =~ /[.,](com|org|net|info|biz|ws)/i ||
		$squashedtext =~ /[.,]com?[.,][a-z][a-z]/i ||
		$squashedtext =~ /^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/i ||
		$numbertrap) {

		$squashedtext =~  s/^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/$3/i;
		$squashedtext =~ s/^.*?-http:\/\///; # 20080206 Delete common pre-pended text
		$squashedtext =~ s/\/.*$//; # Only compare the hostnames
		$squashedtext =~ s/[,.]+$//; # Allow trailing dots and commas
		$squashedtext = 'www.' . $squashedtext
		unless $squashedtext =~ /^ww+|ft+p|fpt+|mailto|webcal/ || $numbertrap;

		# If we have already tagged this link as a phishing attack, spot the
		# warning text we inserted last time and don't tag it again.
		my $squashedpossible = lc($PhishingWarningText);
		my $squashedsearch   = lc($DisarmLinkText);
		$squashedpossible =~ s/\s//g;
		$squashedpossible =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
		$squashedsearch   =~ s/\s//g;
		$squashedsearch   =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
		$squashedpossible = quotemeta($squashedpossible);
		if ($squashedtext =~ /$squashedpossible/) {
			return '';
		}

		# Ignore linkurl in whitelist
		if (InPhishingWhitelist($linkurl)) {
			return '';
		}

		#
		# Known Dangerous Sites List code here
		#
		if (InPhishingBlacklist($linkurl)) {
			return $linkurl;
		}

		#
		# Strict Phishing Net Goes Here
		#
		if ($alarm ||
			($linkurl ne "" && $squashedtext !~ /^(w+\.)?\Q$linkurl\E\/?$/)
			|| ($linkurl ne "" && $numbertrap)) {
			if ($linkurl eq "" || !$numbertrap || $linkurl ne $squashedtext) {
				return $linkurl;
			}
		}
	}

	return '';
}

sub ReadPhishingBlacklistWhitelist {
	%PhishingWhitelist = ReadPhishingWhitelist($MailPlusServer::PhishingLinkChecker::PhishingWhitelist);
	%PhishingBlacklist = ReadPhishingBlacklist($MailPlusServer::PhishingLinkChecker::PhishingBlacklist);
}

# Code below is from MailScanner Config.pm

# Read the list of hostnames to be ignored when doing phishing tests.
# Pass in the filename. Return the hash.
#
# In the code for this, it's a direct copy of the ReadPhishingWhitelist() sub,
# so the white and black names are reversed from what would seem logical.
sub ReadPhishingBlacklist {
	my($filename) = @_;

	my($fname, $fh, %whitelist, @blacklist, $counter);

	%whitelist = ();

	# Skip this if they have findphishing = no
	# return if MailScanner::Config::IsSimpleValue('findphishing') &&
	#           !MailScanner::Config::Value('findphishing');

	$filename =~ s/^\s*//g;
	$filename =~ s/\s*$//g;
	return () unless $filename;

	$counter = 0;
	foreach $fname (split(" ", $filename)) {
		next unless $fname;
		$fh = new FileHandle;
		unless (open($fh, "<$fname")) {
			MailPlusServer::Log::WarnLog("Could not read phishing blacklist file %s", $fname);
			next;
		}

		while(<$fh>) {
			chomp;
			s/^#.*$//;   # Remove comments
			s/^\s*//g;   # Remove leading white space
			s/\s*$//g;   # Remove trailing white space
			s/\s+.*$//g; # Leave only the 1st word
			next if /^$/;
			# Entries in the list starting with "REMOVE " in capitals cause the entry
			# to be forcibly removed from the phishing whitelist.
			if (/^REMOVE\s+(\S+)/i) {
				delete $whitelist{$1};
				push @blacklist, $1;
			} else {
				$whitelist{$_} = 1;
				$counter++;
			}
		}

		# Now process the blacklist
		foreach (@blacklist) {
			delete $whitelist{$_};
		}

		close $fh;
	}
	MailPlusServer::Log::InfoLog("Read %d hostnames from the phishing blacklists", $counter);

	return %whitelist;
}


# Read the list of hostnames to be ignored when doing phishing tests.
# Pass in the filename. Return the hash.
sub ReadPhishingWhitelist {
	my($filename) = @_;

	my($fname, $fh, %whitelist, @blacklist, $counter);

	%whitelist = ();

	# Skip this if they have findphishing = no
	# return if MailScanner::Config::IsSimpleValue('findphishing') &&
	#           !MailScanner::Config::Value('findphishing');

	$filename =~ s/^\s*//g;
	$filename =~ s/\s*$//g;
	return () unless $filename;

	$counter = 0;
	foreach $fname (split(" ", $filename)) {
		$fh = new FileHandle;
		unless (open($fh, "<$fname")) {
			MailPlusServer::Log::WarnLog("Could not read phishing whitelist file %s", $fname);
			next;
		}

		while(<$fh>) {
			chomp;
			s/^#.*$//;   # Remove comments
			s/^\s*//g;   # Remove leading white space
			s/\s*$//g;   # Remove trailing white space
			s/\s+.*$//g; # Leave only the 1st word
			next if /^$/;
			# Entries in the list starting with "REMOVE " in capitals cause the entry
			# to be forcibly removed from the phishing whitelist.
			if (/^REMOVE\s+(\S+)/i) {
				delete $whitelist{$1};
				push @blacklist, $1;
			} else {
				$whitelist{$_} = 1;
				$counter++;
			}
		}

		# Now process the blacklist
		foreach (@blacklist) {
			delete $whitelist{$_};
		}

		close $fh;
	}
	MailPlusServer::Log::InfoLog("Read %d hostnames from the phishing whitelist", $counter);

	return %whitelist;
}

# Return 1 if the hostname in $linkurl is in the safe sites file.
# Return 0 otherwise.
sub InPhishingWhitelist {
	my($linkurl) = @_;

	# Quick lookup
	return 1 if $MailPlusServer::PhishingLinkChecker::PhishingWhitelist{$linkurl};

	# Trim host. off the front of the hostname
	# This is needed to process wildcards in the whitelist
	while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
		# And replace it with *. then look it up
		#print STDERR "Looking up *.$linkurl\n";
		return 1 if $MailPlusServer::PhishingLinkChecker::PhishingWhitelist{'*.' . $linkurl};
	}

	return 0;
}

# Return 1 if the hostname in $linkurl is in the bad sites file.
sub InPhishingBlacklist {
	my($linkurl) = @_;

	# Quick lookup
	return 1 if $MailPlusServer::PhishingLinkChecker::PhishingBlacklist{$linkurl};

	# Trim host. off the front of the hostname
	# This is needed to process wildcards in the whitelist
	while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
		# And replace it with *. then look it up
		#print STDERR "Looking up *.$linkurl\n";
		return 1 if $MailPlusServer::PhishingLinkChecker::PhishingBlacklist{'*.' . $linkurl};
	}

	return 0;
}

1;