chg: [command] Extracted function

pull/40/head
mokaddem 2021-03-01 11:39:18 +01:00
parent 407c63edff
commit 9328bb1b3f
2 changed files with 29 additions and 42 deletions

View File

@ -188,31 +188,7 @@ class FieldSquasherCommand extends Command
$squashedJoinField = $config['finder']['joinFields']['squashed'];
$closestMatchResults = [];
// Compute proximity score
foreach ($squashingObjects as $i => $squashingObject) {
$squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
foreach ($allCanditates as $candidate) {
$squashedJoinValue = Hash::get($candidate, $squashedJoinField);
$proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
$closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
$squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
}
}
// sort by score
foreach ($squashingObjects as $i => $squashingObject) {
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
}
foreach ($closestMatchResults as $i => $proximityScore) {
ksort($closestMatchResults[$i], SORT_NUMERIC);
}
// remove best occurence in other matching sets
foreach ($allCanditates as $candidate) {
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
}
$squashingObjects = $this->getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField);
// pick the best match
foreach ($squashingObjects as $i => $squashingObject) {
@ -248,7 +224,7 @@ class FieldSquasherCommand extends Command
$squashedTarget["{$config['target']['squashedField']}_original_value"] = $squashedTarget[$config['target']['squashedField']];
$squashedTarget['match_score'] = $bestScore;
$squashedTarget['based_on_best_match_joinFields'] = Hash::get($squashingObject, $squashingJoinField);
$squashedTarget['based_on_best_match'] = json_encode($squashingObject);
// $squashedTarget['based_on_best_match'] = json_encode($squashingObject);
$squashedTarget[$config['target']['squashedField']] = $squashingData;
if ($bestScore > 0) {
$notExactCandidates[] = $squashedTarget;
@ -290,24 +266,34 @@ class FieldSquasherCommand extends Command
return $squashingObjects;
}
private function removeMatchFromOtherMatches(&$closestMatchResults, $match, $currentIndex)
private function getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField)
{
// remove squashingObject from all other matches
// Compute proximity score
foreach ($squashingObjects as $i => $squashingObject) {
$squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
foreach ($allCanditates as $candidate) {
$squashedJoinValue = Hash::get($candidate, $squashedJoinField);
$proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
$closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
$squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
}
}
// sort by score
foreach ($squashingObjects as $i => $squashingObject) {
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
}
foreach ($closestMatchResults as $i => $proximityScore) {
if ($i == $currentIndex) {
continue;
}
foreach ($proximityScore as $score => $squashingObjects) {
foreach ($squashingObjects as $j => $squashingObject) {
if ($squashingObject == $match) {
unset($closestMatchResults[$i][$score][$j]);
}
}
if (empty($closestMatchResults[$i][$score])) {
unset($closestMatchResults[$i][$score]);
}
ksort($closestMatchResults[$i], SORT_NUMERIC);
}
// remove best occurence in other matching sets
foreach ($allCanditates as $candidate) {
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
}
return $squashingObjects;
}
private function getProximityScore($value1, $value2)

View File

@ -6,7 +6,8 @@
"squashing": "name"
},
"path": "{n}.Organisation",
"type": "exact"
"type": "closest",
"levenshteinScore": 1
},
"target": {
"model": "Organisations",