Family - Friendly Word Filter

Written in PHP, the purpose of this simple submission form is to automatically moderate profane or prohibited words from posting to a public interface.

Upon completion of the captcha and user input, a number of tests are conducted on the submitted text. The text is checked against blacklists of prohibited words, as well as to a known table of clean dictionary words to generate an intelligent risk score on the basis of similarity. Feel free to try it out and be creative! And don't worry, you can delete your entry once it has been submitted if you wish.

Keyterms: PHP, MySQL, HTML, CSS

Metrics:

Functionality:

Sanitation:



Effectiveness Ratio: 1 - [Profane Submissions (21) / Submitted (247)]: 87%

Profane Ratio: Blocked Profane (161) / Submitted (247): 65%

ID submission spellcheck min_levenshtein max_similar_text md5
51 salty raisins 0 3 3 0df8a0c68f71a784e72781245f14ffc6 (It's not you)
52 very interesting 0 2 2 91f1d4b22400ec4c436e44053f3ba62f (It's not you)
53 Hello world 1 N/A N/A 9b1f7b4867a425d9da31985725cda6db (It's not you)
54 What up 1 N/A N/A 9b1f7b4867a425d9da31985725cda6db (It's not you)
55 Wat 1 N/A N/A 9b1f7b4867a425d9da31985725cda6db (It's not you)
56 Funny man 0 2 3 9b1f7b4867a425d9da31985725cda6db (It's not you)
61 yes 1 N/A N/A 5e08d3e81eaf229eadd0d960a07a1cd8 (It's not you)
63 ok 1 N/A N/A 5d993933416ccee5e5a68a11c960652d (It's not you)
64 Ilovesophia 0 7 5 ef930ea7357371adba383f3b5904636c (It's not you)



<?php
/*
        index.php
        Builds and populates table with database values
        Znamensky | 2021
*/

<!DOCTYPE html>
<html lang = "en-US">
	<head>
		<meta charset = "UTF-8">
		<title>WordList Filter </title>
        <script src='https://www.google.com/recaptcha/api.js' async defer></script>
		<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.5.0/styles/stackoverflow-dark.min.css">
        <link rel="stylesheet" type = "text/css" href="/css/wordfilter.css">
		<script src="/js/highlight.min.js"></script>
		<script>hljs.initHighlightingOnLoad();</script>
		<!--<script src="/js/prism.js"></script>-->
	</head>

<body>


		<h1> Family - Friendly Word Filter </h1>
<p>Written in PHP, the purpose of this simple submission form is to automatically moderate profane or prohibited words from posting to a public interface.</p>


<p>text redacted<p>
<form id = "comment_form" action="insert.php" method="POST">
<label>Try me: </label><input type="text" name="submission">
<input type="submit" name="submit" value="Submit">
<br><br>
<div class="g-recaptcha" data-sitekey=""></div>
</form>

<?php



function generateHash(){
	$userip = $_SERVER['REMOTE_ADDR'];


	if(filter_var($userip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
		$userip2 = ip2long($userip);
	} elseif (filter_var($userip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)){
		$userip2 = inet_pton($userip);
	} else{
		$userip2 = "1.1.1.1";
		//echo "invalid ipv4/ipv6";
	}

	return hash('md5',$userip2);
}

$hash = generateHash();

function function_alert($message) {
    echo "<script>alert('$message');</script>";
}


        //POPULATES THE TABLE UPON INITIAL LOAD
		try {
            $con= new PDO('mysql:host=localhost;dbname="redacted", "redacted", "redacted"');
            $con->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_SILENT);


            $effectiveness_query = "SELECT * FROM effectiveness WHERE id = 0";
            $effectiveness_data = $con->query($effectiveness_query);
            $effectiveness_data->setFetchMode(PDO::FETCH_COLUMN);
            foreach ($effectiveness_data as $effectiveness_data_element){
                 $den1 = $effectiveness_data_element[1];
            }


            $effectiveness_query = "SELECT * FROM effectiveness WHERE id = 1";
            $effectiveness_data = $con->query($effectiveness_query);
            $effectiveness_data->setFetchMode(PDO::FETCH_COLUMN);
            foreach ($effectiveness_data as $effectiveness_data_element){
                //$num = $effectiveness_data_element[0];
                $den2 = $effectiveness_data_element[1];
            }

            $effectiveness_query = "SELECT * FROM effectiveness WHERE id = 2";
            $effectiveness_data = $con->query($effectiveness_query);
            $effectiveness_data->setFetchMode(PDO::FETCH_COLUMN);
            foreach ($effectiveness_data as $effectiveness_data_element){

                $den3 = $effectiveness_data_element[1];
            }

            $effectiveness = $den3/$den1;

            $block=$den1/$den2;

			$block_rounded = number_format((float)$block, 2, '.', '');
			$effectiveness_rounded = number_format((float)$effectiveness, 2, '.', '');
			$effectiveness_rounded = 1-$effectiveness_rounded;

            print "<p><b>Block Ratio:</b> Blocked ($den1) / Submitted ($den2): " . 100*$block_rounded .    "% </p>";

            print "<p><b>Effectiveness Ratio:</b> 1 - [Profane Submissions ($den3) / Submitted ($den2)]: " . 100*$effectiveness_rounded .    "% </p>";

			$query = "SELECT * FROM food";
			print "<table>";
			$result = $con->query($query);
			$row = $result->fetch(PDO::FETCH_ASSOC);
			print " <tr>";

			foreach ($row as $field => $value){

            print " <th>$field</th>";


			}

			print " </tr>";
			//second query gets the data
			$data = $con->query($query);
			$data->setFetchMode(PDO::FETCH_ASSOC);


			foreach($data as $row){
				print " <tr>";
				foreach ($row as $name=>$value){

                    if ($value and $name == 'md5'){

                    	if($value==$hash){

                    		//$button = " (It's you) <button type=\"button\">DELETE</button> ";

                            $button = "<form action=\"script.php\" method=\"get\"><input type=\"submit\" value=\"DELETE\"></form>";
                    	} else {
							$button =" (It's not you)";
                    	}
                    	$color = substr($value, 0, 6);
                        print " <td id = \"button\" style=\"color:#" . $color . ";\">$value" . "$button             </td> ";
                    } else {
					print " <td>$value</td>";
                    }
				}

				print " </tr>";
			}
			print "</table>";
		} catch(PDOException $e) {
	echo 'ERROR' ;
	}



?>




<?php
/*
        insert.php
        User inputted values from the form are checked against blacklist and uploaded to database only if:
        1. captcha is complete
        2. neither word contains substring of a blacklisted word
        3. word length does not exceed maximum length (to keep table clean)
        4. words do not contain special chars that would overflow the cell boundaries

        Znamensky | 2021
*/



    $conn = null;

    function generateHash(){
        $userip = $_SERVER['REMOTE_ADDR'];
        //function_alert($userip);

        if(filter_var($userip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
            $userip2 = ip2long($userip);
        } elseif (filter_var($userip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)){
            $userip2 = inet_pton($userip);
        } else{
            $userip2 = "1.1.1.1";
            //echo "invalid ipv4/ipv6";
        }

        return hash('md5',$userip2);
    }


    //simple popup function for debugging
    function function_alert($message) {
        echo "<script>alert('$message');</script>";
    }


    function containsBadWord($data,$Name){
        foreach($data as $row){
            foreach ($row as $name=>$value){

                if (stripos($Name, $value)!==False){
                    return 1;
                }
            }
        }
        // no bad words
        return 0;
    }

    function leven($Name_nopunctuation_list,$data){
        $levencopy = 100000;
        foreach ($Name_nopunctuation_list as $Name_nopunctuation_list_element){


            foreach($data as $row){
                foreach ($row as $name=>$value){

                    $leven = levenshtein($Name_nopunctuation_list_element, $value);

                    if ($leven<$levencopy){
                        $levencopy = $leven;

                    }

                }

            }

        }
        return $levencopy;
    }

    function similar($Name_nopunctuation_list,$data){
        $similarcopy = 0;
        foreach ($Name_nopunctuation_list as $Name_nopunctuation_list_element){

            foreach($data as $row){
                foreach ($row as $name=>$value){
                    $similar = similar_text($value, $Name_nopunctuation_list_element);

                    if ($similar > $similarcopy){
                        $similarcopy = $similar;

                    }

                }
            }

        }

        return $similarcopy;
    }



    // checks if spelling is clean
    // returns 1 for yes
    // returns 0 for no
    function isSpellingClean($Name_nopunctuation_list_element, $data_clean){

        //no match default
        $score = 0;

        foreach($data_clean as $row_clean){
            foreach ($row_clean as $name_clean=>$value_clean){
                //0 if match (word exists in clean list)
                //-1 if no match (word does not exist in clean list)
                //not case sensitive
                $spellcheck = strcasecmp($Name_nopunctuation_list_element, $value_clean);

                if ($spellcheck==0){
                    //word match
                    $score = 1;

                    return $score;
                }

            }

        }

        return $score;
    }

    $hash = generateHash();

    // CHAR LIMIT AND NON NON-UTF8 FILTER ___________________________________________________________
    $Name = $_POST['submission'];
    //$isUTF8 = preg_match('//u', $Name);
    //function_alert($isUTF8);

    //sanitizes string
    $Name = filter_var($Name, FILTER_SANITIZE_STRING,FILTER_FLAG_STRIP_HIGH );

    $Name_copy = $Name;
    $utf8_Name = utf8_decode($Name);
    $length_Name = strlen($utf8_Name);

    if ($length_Name>100){
        function_alert("Unaccepted characters or message is too long, limit is 30 characters.");
        header("refresh:1; url=index.php");
        exit;
    }

    $Name = $utf8_Name;


    $Name = addslashes($Name);

    // SQL  ___________________________________________________________
    $servername_b4z = "";
    $username_b4z = "";
    $password_b4z = "";
    $dbname_b4z = "";

    $conn = new PDO("mysql:host=$servername_b4z;dbname=$dbname_b4z", $username_b4z, $password_b4z);
    $conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_SILENT);
    //$conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_SILENT);


    //removes punctuation
    $Name_nopunctuation = preg_replace('/\p{P}/', '', $Name);

    //transforms string into an array of strings with space delimeter
    $Name_nopunctuation_list = explode(" ", $Name_nopunctuation);

    //iterates through all words in array and checks if they are spelled correctly
    $score_count=1;
    foreach ($Name_nopunctuation_list as $Name_nopunctuation_list_element) {

        //$isSpellingClean = 1 if user word is in dictionary

        //stores cleanwords in data_clean
        $query_clean = "SELECT * FROM cleanlist";
        $data_clean = $conn->query($query_clean);
        $data_clean->setFetchMode(PDO::FETCH_ASSOC);

        $isSpellingClean = isSpellingClean($Name_nopunctuation_list_element, $data_clean);
        unset($query_clean);
        unset($data_clean);
        $score_count=$score_count*$isSpellingClean;

    }


    $query = "SELECT * FROM badwords";
    $data = $conn->query($query);
    $data->setFetchMode(PDO::FETCH_ASSOC);

    if ($score_count!==0){

        $sql = "INSERT INTO food (submission, spellcheck, min_levenshtein, max_similar_text, md5) VALUES ('$Name', '$score_count', 'N/A', 'N/A', '$hash')";
        $conn->exec($sql);

        $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 1';
        $conn->exec($update);

        $conn = null;
        header("refresh:1; url=index.php");


        exit;

    }

    if ($score_count==0){

        $containsBadWord = containsBadWord($data, $Name);


        if ($containsBadWord==1){
            function_alert("Prohibited word, exiting");


            $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 0';
            $conn->exec($update);
            $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 1';
            $conn->exec($update);
            $conn=null;
            header("refresh:1; url=index.php");
            exit;
        }


        $Name_nopunctuation = preg_replace('/\p{P}/', '', $Name);

        unset($query);
        unset($data);

        $query = "SELECT * FROM badwords";
        $data = $conn->query($query); //conn2
        $data->setFetchMode(PDO::FETCH_ASSOC);

        $containsBadWord = containsBadWord($data, $Name_nopunctuation);
        if ($containsBadWord==1){
            function_alert("Prohibited word, exiting");
            header("refresh:1; url=index.php");
            exit;
        }


    }
    $leven_max= 0;

    unset($query);
    unset($data);

    $query = "SELECT * FROM badwords";
    $data = $conn->query($query);
    $data->setFetchMode(PDO::FETCH_ASSOC);

    $leven = leven($Name_nopunctuation_list,$data);

    unset($query);
    unset($data);

    $query = "SELECT * FROM badwords";
    $data = $conn->query($query);
    $data->setFetchMode(PDO::FETCH_ASSOC);

    $similar = similar($Name_nopunctuation_list,$data);

    $magicRatio = $similar/$leven;

    //block these
    if ($magicRatio>4){
        function_alert("High probability of prohibited word(s), exiting");

        $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 0';
        $conn->exec($update);
        $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 1';
        $conn->exec($update);

        $conn = null;
        header("refresh:1; url=index.php");
        exit;

    } else {

        $sql = "INSERT INTO food (submission, spellcheck, min_levenshtein, max_similar_text, md5) VALUES ('$Name', '$score_count', '$leven', '$similar','$hash')";
        $conn->exec($sql);
        $update = 'UPDATE effectiveness SET effectiveness = effectiveness + 1 WHERE id = 1';
        $conn->exec($update);

        $conn = null;
        header("refresh:1; url=index.php");



    }



?>