Fixed an issue with the freetext import

- url detection would detect any word with a trailing "." as a valid url
  - google. was detected as a url
- this also caused training "."s to be included in valid urls
  - http://www.google.com.
pull/897/head
Iglocska 2016-01-23 20:19:44 +01:00
parent c4cf4eca9c
commit c2c41b04d3
1 changed files with 1 additions and 1 deletions

View File

@ -104,6 +104,7 @@ class ComplexTypeTool {
$inputRefanged = preg_replace('/^hxxp/i', 'http', $input);
$inputRefanged = preg_replace('/\[\.\]/', '.' , $inputRefanged);
$inputRefanged = rtrim($inputRefanged, ".");
// note down and remove the port if it's a url / domain name / hostname / ip
// input2 from here on is the variable containing the original input with the port removed. It is only used by url / domain name / hostname / ip
$comment = false;
@ -124,7 +125,6 @@ class ComplexTypeTool {
// check for domain name, hostname, filename
if (strpos($inputRefanged, '.') !== false) {
$temp = explode('.', $inputRefanged);
//if (filter_var($input, FILTER_VALIDATE_URL)) {
if (preg_match('/^([-\pL\pN]+\.)+([a-z][a-z]|biz|cat|com|edu|gov|int|mil|net|org|pro|tel|aero|arpa|asia|coop|info|jobs|mobi|name|museum|travel)(:[0-9]{2,5})?$/iu', $inputRefanged)) {
if (count($temp) > 2) {