chg: [command] Extracted function
parent
407c63edff
commit
9328bb1b3f
|
@ -188,31 +188,7 @@ class FieldSquasherCommand extends Command
|
||||||
$squashedJoinField = $config['finder']['joinFields']['squashed'];
|
$squashedJoinField = $config['finder']['joinFields']['squashed'];
|
||||||
$closestMatchResults = [];
|
$closestMatchResults = [];
|
||||||
|
|
||||||
// Compute proximity score
|
$squashingObjects = $this->getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField);
|
||||||
foreach ($squashingObjects as $i => $squashingObject) {
|
|
||||||
$squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
|
|
||||||
foreach ($allCanditates as $candidate) {
|
|
||||||
$squashedJoinValue = Hash::get($candidate, $squashedJoinField);
|
|
||||||
$proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
|
|
||||||
$closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
|
|
||||||
$squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by score
|
|
||||||
foreach ($squashingObjects as $i => $squashingObject) {
|
|
||||||
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
|
|
||||||
}
|
|
||||||
foreach ($closestMatchResults as $i => $proximityScore) {
|
|
||||||
ksort($closestMatchResults[$i], SORT_NUMERIC);
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove best occurence in other matching sets
|
|
||||||
foreach ($allCanditates as $candidate) {
|
|
||||||
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
|
|
||||||
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
|
|
||||||
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
|
|
||||||
}
|
|
||||||
|
|
||||||
// pick the best match
|
// pick the best match
|
||||||
foreach ($squashingObjects as $i => $squashingObject) {
|
foreach ($squashingObjects as $i => $squashingObject) {
|
||||||
|
@ -248,7 +224,7 @@ class FieldSquasherCommand extends Command
|
||||||
$squashedTarget["{$config['target']['squashedField']}_original_value"] = $squashedTarget[$config['target']['squashedField']];
|
$squashedTarget["{$config['target']['squashedField']}_original_value"] = $squashedTarget[$config['target']['squashedField']];
|
||||||
$squashedTarget['match_score'] = $bestScore;
|
$squashedTarget['match_score'] = $bestScore;
|
||||||
$squashedTarget['based_on_best_match_joinFields'] = Hash::get($squashingObject, $squashingJoinField);
|
$squashedTarget['based_on_best_match_joinFields'] = Hash::get($squashingObject, $squashingJoinField);
|
||||||
$squashedTarget['based_on_best_match'] = json_encode($squashingObject);
|
// $squashedTarget['based_on_best_match'] = json_encode($squashingObject);
|
||||||
$squashedTarget[$config['target']['squashedField']] = $squashingData;
|
$squashedTarget[$config['target']['squashedField']] = $squashingData;
|
||||||
if ($bestScore > 0) {
|
if ($bestScore > 0) {
|
||||||
$notExactCandidates[] = $squashedTarget;
|
$notExactCandidates[] = $squashedTarget;
|
||||||
|
@ -290,24 +266,34 @@ class FieldSquasherCommand extends Command
|
||||||
return $squashingObjects;
|
return $squashingObjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function removeMatchFromOtherMatches(&$closestMatchResults, $match, $currentIndex)
|
private function getBestOccurenceSet($squashingObjects, $allCanditates, $squashingJoinField, $squashedJoinField)
|
||||||
{
|
{
|
||||||
// remove squashingObject from all other matches
|
// Compute proximity score
|
||||||
|
foreach ($squashingObjects as $i => $squashingObject) {
|
||||||
|
$squashingJoinValue = Hash::get($squashingObject, $squashingJoinField);
|
||||||
|
foreach ($allCanditates as $candidate) {
|
||||||
|
$squashedJoinValue = Hash::get($candidate, $squashedJoinField);
|
||||||
|
$proximityScore = $this->getProximityScore($squashingJoinValue, $squashedJoinValue);
|
||||||
|
$closestMatchResults[$candidate['id']][$proximityScore][] = $squashingObject;
|
||||||
|
$squashingObjects[$i]['__scores'][$proximityScore][] = $candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort by score
|
||||||
|
foreach ($squashingObjects as $i => $squashingObject) {
|
||||||
|
ksort($squashingObjects[$i]['__scores'], SORT_NUMERIC);
|
||||||
|
}
|
||||||
foreach ($closestMatchResults as $i => $proximityScore) {
|
foreach ($closestMatchResults as $i => $proximityScore) {
|
||||||
if ($i == $currentIndex) {
|
ksort($closestMatchResults[$i], SORT_NUMERIC);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
foreach ($proximityScore as $score => $squashingObjects) {
|
|
||||||
foreach ($squashingObjects as $j => $squashingObject) {
|
|
||||||
if ($squashingObject == $match) {
|
|
||||||
unset($closestMatchResults[$i][$score][$j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (empty($closestMatchResults[$i][$score])) {
|
|
||||||
unset($closestMatchResults[$i][$score]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remove best occurence in other matching sets
|
||||||
|
foreach ($allCanditates as $candidate) {
|
||||||
|
$bestScore = array_key_first($closestMatchResults[$candidate['id']]);
|
||||||
|
$bestMatch = $closestMatchResults[$candidate['id']][$bestScore][0];
|
||||||
|
$squashingObjects = $this->removeCandidatesFromSquashingSet($squashingObjects, $bestMatch, $candidate['id']);
|
||||||
}
|
}
|
||||||
|
return $squashingObjects;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function getProximityScore($value1, $value2)
|
private function getProximityScore($value1, $value2)
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
"squashing": "name"
|
"squashing": "name"
|
||||||
},
|
},
|
||||||
"path": "{n}.Organisation",
|
"path": "{n}.Organisation",
|
||||||
"type": "exact"
|
"type": "closest",
|
||||||
|
"levenshteinScore": 1
|
||||||
},
|
},
|
||||||
"target": {
|
"target": {
|
||||||
"model": "Organisations",
|
"model": "Organisations",
|
||||||
|
|
Loading…
Reference in New Issue