<?php

require_once dirname(__FILE__) . "/base.php";
require_once dirname(__FILE__) . "/log.php";
require_once dirname(__FILE__) . "/mime.php";

define("AC_SPAM_BODY", "b");
define("AC_SPAM_SUBJECT", "s");
define("AC_SPAM_META", "m");

$ac_spam_delims = array(
	'`', '~', '!', '@', '#',
	'^', '&', '*', '(', ')',
	'-', '_', '=', '+', '\\',
	'|', ']', '}', '[', '{',
	'\'', '"', ';', ':', '/',
	'?', '.', '>', ',', '<',
);

$ac_spam_ignore = array(
	"a",
	"and",
	"are",
	"as",
	"at",
	"be",
	"by",
	"can",
	"have",
	"i",
	"in",
	"is",
	"nbsp",
	"of",
	"on",
	"or",
	"our",
	"that",
	"the",
	"this",
	"to",
	"will",
	"your",
	"A",
	"And",
	"Are",
	"As",
	"At",
	"Be",
	"By",
	"Can",
	"Have",
	"I",
	"In",
	"Is",
	"Of",
	"On",
	"Or",
	"Our",
	"That",
	"The",
	"This",
	"To",
	"Will",
	"Your",
);

$ac_spam_dbs = array();
$ac_spam_dbh = array();

$ac_spam_ins = array();
$ac_spam_inh = array();
$ac_spam_ups = array();
$ac_spam_uph = array();

$ac_spam_ns = 1;
$ac_spam_nh = 1;

function ac_spam_cache() {
	# Cache the number of spam and ham messages.  Normally run when we open the database, this
	# function may need to be re-run if we add new words and re-use the database connection.
	$GLOBALS['ac_spam_ns'] = max((int)ac_sql_select_one("SELECT hits FROM #spam_s WHERE word = 'countm'"), 1);
	$GLOBALS['ac_spam_nh'] = max((int)ac_sql_select_one("SELECT hits FROM #spam_h WHERE word = 'countm'"), 1);
}

function ac_spam_upcount_s() {
	$GLOBALS["ac_spam_ns"]++;
	ac_sql_query("UPDATE #spam_s SET hits = hits + 1 WHERE word = 'countm'");
}

function ac_spam_upcount_h() {
	$GLOBALS["ac_spam_nh"]++;
	ac_sql_query("UPDATE #spam_h SET hits = hits + 1 WHERE word = 'countm'");
}

function ac_spam_downcount_s() {
	$GLOBALS["ac_spam_ns"]--;
	ac_sql_query("UPDATE #spam_s SET hits = hits - 1 WHERE word = 'countm' AND hits > 0");
}

function ac_spam_downcount_h() {
	$GLOBALS["ac_spam_nh"]--;
	ac_sql_query("UPDATE #spam_h SET hits = hits - 1 WHERE word = 'countm' AND hits > 0");
}

function ac_spam_record_s($context, $word) {
	global $ac_spam_dbs;
	if (isset($ac_spam_dbs[$word . $context]))
		return $ac_spam_dbs[$word . $context];
	else
		return false;
}

function ac_spam_record_h($context, $word) {
	global $ac_spam_dbh;
	if (isset($ac_spam_dbh[$word . $context]))
		return $ac_spam_dbh[$word . $context];
	else
		return false;
}

function ac_spam_learn_s($context, $word) {
	global $ac_spam_ups;
	global $ac_spam_ns;
	$record = ac_spam_record_s($context, $word);

	if ($record === false) {
		ac_sql_query("
			INSERT INTO #spam_s (word, hits) VALUES ('{$word}{$context}', '1')
		");
		$id = ac_sql_insert_id();
	} else {
		if (!isset($ac_spam_ups[$word . $context]))
			$ac_spam_ups[$word . $context] = 1;
		else
			$ac_spam_ups[$word . $context]++;
	}
}

function ac_spam_learn_h($context, $word) {
	global $ac_spam_dbh;
	global $ac_spam_uph;
	global $ac_spam_nh;
	$record = ac_spam_record_h($context, $word);

	if ($record === false) {
		ac_sql_query("
			INSERT INTO #spam_h (word, hits) VALUES ('{$word}{$context}', '1')
		");
		$id = ac_sql_insert_id();
	} else {
		if (!isset($ac_spam_uph[$word . $context]))
			$ac_spam_uph[$word . $context] = 1;
		else
			$ac_spam_uph[$word . $context]++;
	}
}

function ac_spam_unlearn_s($context, $word) {
	$record = ac_spam_record_s($context, $word);
	$combo  = ac_sql_escape($word . $context);

	if ($record === false)
		return;

	if ($record == 1) {
		ac_sql_query("DELETE FROM #spam_s WHERE word = '$combo'");
	} else {
		ac_sql_query("UPDATE #spam_s SET hits = hits - 1 WHERE word = '$combo'");
	}

	ac_spam_learn_h($context, $word);
}

function ac_spam_unlearn_h($context, $word) {
	$record = ac_spam_record_h($context, $word);
	$combo  = ac_sql_escape($word . $context);

	if ($record === false)
		return;

	if ($record == 1) {
		ac_sql_query("DELETE FROM #spam_h WHERE word = '$combo'");
	} else {
		ac_sql_query("UPDATE #spam_h SET hits = hits - 1 WHERE word = '$combo'");
	}

	ac_spam_learn_s($context, $word);
}

function ac_spam_update_s() {
	global $ac_spam_ups;
	ac_sql_query("ALTER TABLE #spam_s DISABLE KEYS");
	foreach ($ac_spam_ups as $word => $hits) {
		$b = microtime(true);
		$word = ac_sql_escape($word);
		$hits = (int)$hits;
		ac_sql_query("UPDATE #spam_s SET hits = hits + $hits WHERE BINARY word = '$word'");
	}
	ac_sql_query("ALTER TABLE #spam_s ENABLE KEYS");

	$ac_spam_ups = array();
}

function ac_spam_update_h() {
	global $ac_spam_uph;
	ac_sql_query("ALTER TABLE #spam_h DISABLE KEYS");
	foreach ($ac_spam_uph as $word => $hits) {
		$word = ac_sql_escape($word);
		$hits = (int)$hits;
		ac_sql_query("UPDATE #spam_h SET hits = hits + $hits WHERE BINARY word = '$word'");
	}
	ac_sql_query("ALTER TABLE #spam_h ENABLE KEYS");

	$ac_spam_uph = array();
}

function ac_spam_prefetch_s($context, $words) {
	global $ac_spam_dbs;

	if (is_string($words))
		$words = ac_spam_words($words);

	for ($i = 0, $len = count($words); $i < $len; $i += 20) {
		if (($i + 20) < $len)
			$ary = array_slice($words, $i, 20);
		else
			$ary = array_slice($words, $i);

		for ($j = 0; $j < count($ary); $j++) {
			$ary[$j] .= $context;
			if (isset($ac_spam_dbs[$ary[$j]]))
				unset($ary[$j]);
			else
				$ary[$j]  = ac_sql_escape($ary[$j]);
		}

		$str = implode("','", $ary);

		$rs  = ac_sql_query("
			SELECT word, hits FROM #spam_s WHERE word IN ('$str')
		");

		while ($row = ac_sql_fetch_assoc($rs))
			$ac_spam_dbs[$row["word"]] = $row["hits"];
	}
}

function ac_spam_prefetch_h($context, $words) {
	global $ac_spam_dbh;

	if (is_string($words))
		$words = ac_spam_words($words);

	for ($i = 0, $len = count($words); $i < $len; $i += 20) {
		if (($i + 20) < $len)
			$ary = array_slice($words, $i, 20);
		else
			$ary = array_slice($words, $i);

		for ($j = 0; $j < count($ary); $j++) {
			$ary[$j] .= $context;

			if (isset($ac_spam_dbh[$ary[$j]]))
				unset($ary[$j]);
			else
				$ary[$j]  = ac_sql_escape($ary[$j]);
		}

		$str = implode("','", $ary);

		$rs  = ac_sql_query("
			SELECT word, hits FROM #spam_h WHERE word IN ('$str')
		");

		while ($row = ac_sql_fetch_assoc($rs))
			$ac_spam_dbh[$row["word"]] = $row["hits"];
	}
}

function ac_spam_word_s($context, $word) {
	global $ac_spam_dbs;
	global $ac_spam_ns;
	if (isset($ac_spam_dbs[$word . $context]))
		return (float)$ac_spam_dbs[$word . $context] / (float)$ac_spam_ns;
	else
		return 0.0;
}

function ac_spam_word_h($context, $word) {
	global $ac_spam_dbh;
	global $ac_spam_nh;
	if (isset($ac_spam_dbh[$word . $context]))
		return (float)$ac_spam_dbh[$word . $context] / (float)$ac_spam_nh;
	else
		return 0.0;
}

function ac_spam_message_s($context, $word) {
	# What follows is a basic adaptation of Paul Graham's seminal spam algorithm in his essay A
	# Plan for Spam (http://www.paulgraham.com/spam.html).
	$prh      = ac_spam_word_h($context, $word);
	$prs      = ac_spam_word_s($context, $word);

	if ($prh == $prs && $prh == 0.0)
		return 0.5;

	$g        = 2.0 * $prh;
	$b        = $prs;
	$ngood    = $GLOBALS['ac_spam_nh'];
	$nbad     = $GLOBALS['ac_spam_ns'];
	$dividend = min(1.0, $b / $nbad);
	$divisor  = min(1.0, $g / $ngood) + $dividend;
	$prob     = min(0.99, $dividend / $divisor);

	return max(0.01, $prob);
}

function ac_spam_cb_oneminus($x) {
	return 1.0 - (float)$x;
}

function ac_spam_words($message) {
	$words = str_replace($GLOBALS['ac_spam_delims'], ' ', $message);
	$words = preg_replace('/\s+/m', ' ', $words);
	return explode(" ", trim($words));
}

function ac_spam_probability($context, $message) {
	global $ac_spam_ignore;

	$words = ac_spam_words($message);
	$probs = array();

	foreach ($words as $w) {
		# If the word is on the ignore list, we assume it's too common to really want to consider
		# it for our spam score.
		if (!isset($ac_spam_ignore[$w]))
			$probs[] = ac_spam_message_s($context, $w);
	}

	if (count($probs) == 0)
		return 0.01;

	# prod  = a * b * c ... N
	for ($prod = $probs[0], $i = 1; $i < count($probs); $i++)
		$prod *= $probs[$i];

	# prodp = (1-a) * (1-b) * (1-c) ... (1-N)
	$probs = array_map('ac_spam_cb_oneminus', $probs);
	for ($prodp = $probs[0], $i = 1; $i < count($probs); $i++)
		$prodp *= $probs[$i];

	return min(0.99, max(0.01, $prod / max(0.01, $prod + $prodp)));
}

function ac_spam_mark_s($context, $message) {
	$words = ac_spam_words($message);

	foreach ($words as $word) {
		ac_spam_learn_s($context, $word);
	}
}

function ac_spam_unmark_s($context, $message) {
	$words = ac_spam_words($message);

	foreach ($words as $word) {
		ac_spam_unlearn_s($context, $word);
	}
}

function ac_spam_unmark_h($context, $message) {
	$words = ac_spam_words($message);

	foreach ($words as $word) {
		ac_spam_unlearn_h($context, $word);
	}
}

function ac_spam_mark_h($context, $message) {
	$words = ac_spam_words($message);

	foreach ($words as $word) {
		ac_spam_learn_h($context, $word);
	}
}

function ac_spam_email($email) {
	# First check our whitelist and blacklist.

	if (isset($email->headers["from"])) {
		$from   = ac_mail_extract_recipients($email->headers["from"]);

		foreach ($from as $addr) {
			$tmp = explode("@", $addr);

			if (count($tmp) > 1) {
				$domain = ac_sql_escape($tmp[1]);

				if ((int)ac_sql_select_one("SELECT COUNT(*) FROM #spam_whitelist WHERE domain = '$domain'") > 0)
					return 0.1;

				if ((int)ac_sql_select_one("SELECT COUNT(*) FROM #spam_blacklist WHERE domain = '$domain'") > 0)
					return 0.99;
			}
		}
	}

	# Process an email object like what would be produced by ac_mail_extract().

	$probs = array(
		"body"    => 0.0,
		"subject" => 0.0,
	);

	if (isset($email->body)) {
		$body = ac_str_strip_tags($email->body);
		ac_spam_prefetch_s(AC_SPAM_BODY, $body);
		ac_spam_prefetch_h(AC_SPAM_BODY, $body);
		$probs["body"] = ac_spam_probability(AC_SPAM_BODY, $body);
	}

	if (isset($email->headers["subject"])) {
		if ( $email->headers["subject"] ) {
			if ( is_array($email->headers["subject"]) ) $email->headers["subject"] = implode(' ', $email->headers["subject"]);
			ac_spam_prefetch_s(AC_SPAM_SUBJECT, $email->headers["subject"]);
			ac_spam_prefetch_h(AC_SPAM_SUBJECT, $email->headers["subject"]);
			$probs["subject"] = ac_spam_probability(AC_SPAM_SUBJECT, $email->headers["subject"]);
		} else {
			$probs["subject"] = .99;
		}
	}

	if (isset($GLOBALS["__log_spam"])) {
		$ins = array(
			"body"         => $probs["body"],
			"subject"      => $probs["subject"],
			"subject_text" => $email->headers["subject"],
			"headers"      => var_export($email->headers, true),
		);

		ac_sql_insert("#logspam", $ins);
	}

	if ($probs["subject"] == 0.99 || $probs["body"] == 0.99)
		return 0.99;

	$rval  = $probs["body"] * 0.7;

	if ($rval <= 0.1)
		$rval += ($probs["subject"] * 0.3) * 2;
	else
		$rval += ($probs["subject"] * 0.3);

	$rval += ac_spam_points($email);

	return min(0.99, $rval);
}

function ac_spam_points($email) {
	$rval = 0.0;

	if (!isset($email->body) || trim($email->body) == "")
		$rval += 0.9;

	return $rval;
}

?>
