chg: [command] Extracted function

pull/40/head
mokaddem 2021-03-01 11:39:18 +01:00
parent 407c63edff
commit 9328bb1b3f
2 changed files with 29 additions and 42 deletions

View File

@ -188,31 +188,7 @@ class FieldSquasherCommand extends Command
$squashedJoinField = $config['finder']['joinFields']['squashed']; $squashedJoinField = $config['finder']['joinFields']['squashed'];
$closestMatchResults = []; $closestMatchResults = [];
// Compute proximity score $squashingObjects = $this->getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField);
foreach ($squashingObjects as $i => $squashingObject) {
$squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
foreach ($allCanditates as $candidate) {
$squashedJoinValue = Hash::get($candidate, $squashedJoinField);
$proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
$closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
$squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
}
}
// sort by score
foreach ($squashingObjects as $i => $squashingObject) {
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
}
foreach ($closestMatchResults as $i => $proximityScore) {
ksort($closestMatchResults[$i], SORT_NUMERIC);
}
// remove best occurence in other matching sets
foreach ($allCanditates as $candidate) {
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
}
// pick the best match // pick the best match
foreach ($squashingObjects as $i => $squashingObject) { foreach ($squashingObjects as $i => $squashingObject) {
@ -248,7 +224,7 @@ class FieldSquasherCommand extends Command
$squashedTarget["{$config['target']['squashedField']}_original_value"] = $squashedTarget[$config['target']['squashedField']]; $squashedTarget["{$config['target']['squashedField']}_original_value"] = $squashedTarget[$config['target']['squashedField']];
$squashedTarget['match_score'] = $bestScore; $squashedTarget['match_score'] = $bestScore;
$squashedTarget['based_on_best_match_joinFields'] = Hash::get($squashingObject, $squashingJoinField); $squashedTarget['based_on_best_match_joinFields'] = Hash::get($squashingObject, $squashingJoinField);
$squashedTarget['based_on_best_match'] = json_encode($squashingObject); // $squashedTarget['based_on_best_match'] = json_encode($squashingObject);
$squashedTarget[$config['target']['squashedField']] = $squashingData; $squashedTarget[$config['target']['squashedField']] = $squashingData;
if ($bestScore > 0) { if ($bestScore > 0) {
$notExactCandidates[] = $squashedTarget; $notExactCandidates[] = $squashedTarget;
@ -290,24 +266,34 @@ class FieldSquasherCommand extends Command
return $squashingObjects; return $squashingObjects;
} }
private function removeMatchFromOtherMatches(&$closestMatchResults, $match, $currentIndex) private function getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField)
{ {
// remove squashingObject from all other matches // Compute proximity score
foreach ($closestMatchResults as $i => $proximityScore) { foreach ($squashingObjects as $i => $squashingObject) {
if ($i == $currentIndex) { $squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
continue; foreach ($allCanditates as $candidate) {
} $squashedJoinValue = Hash::get($candidate, $squashedJoinField);
foreach ($proximityScore as $score => $squashingObjects) { $proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
foreach ($squashingObjects as $j => $squashingObject) { $closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
if ($squashingObject == $match) { $squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
unset($closestMatchResults[$i][$score][$j]);
}
}
if (empty($closestMatchResults[$i][$score])) {
unset($closestMatchResults[$i][$score]);
}
} }
} }
// sort by score
foreach ($squashingObjects as $i => $squashingObject) {
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
}
foreach ($closestMatchResults as $i => $proximityScore) {
ksort($closestMatchResults[$i], SORT_NUMERIC);
}
// remove best occurence in other matching sets
foreach ($allCanditates as $candidate) {
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
}
return $squashingObjects;
} }
private function getProximityScore($value1, $value2) private function getProximityScore($value1, $value2)

View File

@ -6,7 +6,8 @@
"squashing": "name" "squashing": "name"
}, },
"path": "{n}.Organisation", "path": "{n}.Organisation",
"type": "exact" "type": "closest",
"levenshteinScore": 1
}, },
"target": { "target": {
"model": "Organisations", "model": "Organisations",