diff --git a/README.md b/README.md index c000d85..dc99364 100644 --- a/README.md +++ b/README.md @@ -40,3 +40,7 @@ echo $text; // consist ## Tests A verification list of 29,000 words and their expected stems can be run (after ```composer install``` via ```phpunit```). + +## Contributions (Paras Lehana) + +* *External file support for protected words:* Now you can add protected words to this stemmer. Protected words won't be stemmed. For example, I have added 'training' as protected word so that it doesn't get stemmed to 'train'. Add words in newline in file src/protwords.txt (more instructions in protwords.txt file). diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..2f7efbe --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-minimal \ No newline at end of file diff --git a/src/Porter2.php b/src/Porter2.php index 964a8aa..e8c46bc 100644 --- a/src/Porter2.php +++ b/src/Porter2.php @@ -161,6 +161,14 @@ protected static function step1b($word) { 'exceed', 'succeed', ); + + $line_arr = file(__DIR__.'/protwords.txt',FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES); + + foreach ($line_arr as $line){ + if(substr(trim($line),0,1)=='#') continue; + array_push($exceptions,(string)$line); + } + if (in_array($word, $exceptions)) { return $word; } diff --git a/src/protwords.txt b/src/protwords.txt new file mode 100644 index 0000000..275e4ed --- /dev/null +++ b/src/protwords.txt @@ -0,0 +1,10 @@ +# Please add protected words here in new line. +# Lines starting with hash would be ignored so you comments using this. +# I usually add current date before adding a protected word. +# Protected words: The words you'll be putting here would be ignored by the stemmer. That is, for a keyword like 'training' put here, it would be returned as 'training' by the stemmer and not 'train'. +# Blank lines would also be ignored. +# Demo: Uncomment 'training' that I have added on Jan 10, 2019 to protect it. Likewise, you can add your words in each line. + +# Added on Jan 10, 2018 + +training