new: [command] First version of FieldSquasher - WiP
parent
76617565fa
commit
f8e9632f78
|
@ -0,0 +1,357 @@
|
|||
<?php
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use Cake\Console\Command;
|
||||
use Cake\Console\Arguments;
|
||||
use Cake\Console\ConsoleIo;
|
||||
use Cake\Console\ConsoleOptionParser;
|
||||
use Cake\Filesystem\File;
|
||||
use Cake\Utility\Hash;
|
||||
use Cake\Utility\Text;
|
||||
use Cake\Validation\Validator;
|
||||
use Cake\Http\Client;
|
||||
|
||||
class FieldSquasherCommand extends Command
|
||||
{
|
||||
protected $modelClass = 'Organisations';
|
||||
private $targetModel = 'Organisations';
|
||||
|
||||
protected function buildOptionParser(ConsoleOptionParser $parser): ConsoleOptionParser
|
||||
{
|
||||
$parser->setDescription('Squash field value from external data source');
|
||||
$parser->addArgument('config', [
|
||||
'help' => 'JSON configuration file path for the importer.',
|
||||
'required' => true
|
||||
]);
|
||||
return $parser;
|
||||
}
|
||||
|
||||
public function execute(Arguments $args, ConsoleIo $io)
|
||||
{
|
||||
$this->io = $io;
|
||||
$configPath = $args->getArgument('config');
|
||||
$config = $this->getConfigFromFile($configPath);
|
||||
$this->processConfig($config);
|
||||
$this->modelClass = $config['target']['model'];
|
||||
$source = $config['source'];
|
||||
|
||||
$table = $this->modelClass;
|
||||
$this->loadModel($table);
|
||||
$sourceData = $this->getDataFromSource($source);
|
||||
$candidateResult = $this->findCanditates($this->{$table}, $config, $sourceData);
|
||||
$entitiesSample = array_slice($candidateResult['candidates'], 0, min(10, count($candidateResult['candidates'])));
|
||||
$noCandidatesSample = array_slice($candidateResult['noCandidatesFound'], 0, min(10, count($candidateResult['noCandidatesFound'])));
|
||||
$totalNotFound = count($candidateResult['noCandidatesFound']);
|
||||
$this->io->out("Sample of no candidates found (total: {$totalNotFound}):");
|
||||
$ioTable = $this->transformEntitiesIntoTable($noCandidatesSample);
|
||||
$io->helper('Table')->output($ioTable);
|
||||
$filename = 'no_candidates_found_' . time() . '.json';
|
||||
$selection = $io->askChoice("Would you like to save these entries on the disk as `{$filename}`", ['Y', 'N'], 'Y');
|
||||
if ($selection == 'Y') {
|
||||
$this->saveDataOnDisk($filename, $candidateResult['noCandidatesFound']);
|
||||
}
|
||||
|
||||
$this->io->out('');
|
||||
$ioTable = $this->transformEntitiesIntoTable($entitiesSample, [
|
||||
'id',
|
||||
$config['finder']['joinFields']['squashed'],
|
||||
$config['target']['squashedField'],
|
||||
"{$config['target']['squashedField']}_original_value",
|
||||
]);
|
||||
$io->helper('Table')->output($ioTable);
|
||||
$filename = 'replacement_done_' . time() . '.json';
|
||||
$selection = $io->askChoice("Would you like to save these entries on the disk as `{$filename}`", ['Y', 'N'], 'Y');
|
||||
if ($selection == 'Y') {
|
||||
$this->saveDataOnDisk($filename, $candidateResult['candidates']);
|
||||
}
|
||||
die(1);
|
||||
|
||||
$selection = $io->askChoice('A sample of the data you about to be saved is provided above. Would you like to proceed?', ['Y', 'N'], 'N');
|
||||
if ($selection == 'Y') {
|
||||
// $this->saveData($this->{$table}, $entities);
|
||||
}
|
||||
}
|
||||
|
||||
private function saveData($table, $entities)
|
||||
{
|
||||
$this->loadModel('MetaFields');
|
||||
$this->io->verbose('Saving data');
|
||||
$progress = $this->io->helper('Progress');
|
||||
|
||||
$entities = $table->saveMany($entities);
|
||||
if ($entities === false) {
|
||||
$this->io->error('Error while saving data');
|
||||
}
|
||||
$this->io->verbose('Saving meta fields');
|
||||
$this->io->out('');
|
||||
$progress->init([
|
||||
'total' => count($entities),
|
||||
'length' => 20
|
||||
]);
|
||||
foreach ($entities as $i => $entity) {
|
||||
$this->saveMetaFields($entity);
|
||||
$progress->increment(1);
|
||||
$progress->draw();
|
||||
}
|
||||
$this->io->out('');
|
||||
}
|
||||
|
||||
private function findCanditates($table, $config, $source)
|
||||
{
|
||||
$this->io->verbose('Finding candidates');
|
||||
if ($config['finder']['type'] == 'exact') {
|
||||
$candidateResult = $this->findCanditatesByStrictMatching($table, $config, $source);
|
||||
} else {
|
||||
$this->io->error('Unsupported search type');
|
||||
die(1);
|
||||
}
|
||||
return $candidateResult;
|
||||
}
|
||||
|
||||
private function findCanditatesByStrictMatching($table, $config, $source)
|
||||
{
|
||||
$squashingObjects = Hash::extract($source, $config['finder']['path']);
|
||||
if (empty($squashingObjects)) {
|
||||
$this->io->error('finder.path returned nothing');
|
||||
return [];
|
||||
}
|
||||
$values = Hash::extract($squashingObjects, "{n}.{$config['finder']['joinFields']['squashing']}");
|
||||
$query = $table->find('list', [
|
||||
'keyField' => $config['finder']['joinFields']['squashed'],
|
||||
'valueField' => function ($entry) {
|
||||
return $entry;
|
||||
}
|
||||
])->where([
|
||||
"{$config['finder']['joinFields']['squashed']} IN" => $values
|
||||
]);
|
||||
$potentialCanditates = $query->toArray();
|
||||
$candidates = [];
|
||||
$noCandidatesFound = [];
|
||||
|
||||
foreach ($squashingObjects as $i => $squashingObject) {
|
||||
$squashingData = Hash::get($squashingObject, $config['squashingData']['squashingField']);
|
||||
if (isset($this->{$config['squashingData']['massage']})) {
|
||||
$squashingData = $this->{$config['squashingData']['massage']}($squashingData);
|
||||
}
|
||||
$squashingJoinField = Hash::get($squashingObject, $config['finder']['joinFields']['squashing']);
|
||||
if (empty($potentialCanditates[$squashingJoinField])) {
|
||||
$noCandidatesFound[] = $squashingObject;
|
||||
} else {
|
||||
$squashedTarget = $potentialCanditates[$squashingJoinField];
|
||||
$squashedTarget->{"{$config['target']['squashedField']}_original_value"} = $squashedTarget->{$config['target']['squashedField']};
|
||||
$squashedTarget->{$config['target']['squashedField']} = $squashingData;
|
||||
$candidates[] = $squashedTarget;
|
||||
}
|
||||
}
|
||||
return [
|
||||
'candidates' => $candidates,
|
||||
'noCandidatesFound' => $noCandidatesFound
|
||||
];
|
||||
}
|
||||
|
||||
private function extractDataFromJSON($defaultFields, $config, $source)
|
||||
{
|
||||
$data = [];
|
||||
foreach ($config['mapping'] as $key => $fieldConfig) {
|
||||
$values = null;
|
||||
if (!is_array($fieldConfig)) {
|
||||
$fieldConfig = ['path' => $fieldConfig];
|
||||
}
|
||||
if (!empty($fieldConfig['path'])) {
|
||||
$values = Hash::extract($source, $fieldConfig['path']);
|
||||
}
|
||||
if (!empty($fieldConfig['massage'])) {
|
||||
$values = array_map("self::{$fieldConfig['massage']}", $values);
|
||||
}
|
||||
if (isset($defaultFields[$key])) {
|
||||
$data[$key] = $values;
|
||||
} else {
|
||||
$data['metaFields'][$key] = $values;
|
||||
}
|
||||
}
|
||||
return $this->invertArray($data);
|
||||
}
|
||||
|
||||
private function getDataFromSource($source)
|
||||
{
|
||||
$data = $this->getDataFromFile($source);
|
||||
if ($data === false) {
|
||||
$data = $this->getDataFromURL($source);
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
private function getDataFromURL($url)
|
||||
{
|
||||
$validator = new Validator();
|
||||
$validator
|
||||
->requirePresence('url')
|
||||
->notEmptyString('url', 'Please provide a valid source')
|
||||
->url('url');
|
||||
$errors = $validator->validate(['url' => $url]);
|
||||
if (!empty($errors)) {
|
||||
$this->io->error(json_encode(Hash::extract($errors, '{s}'), JSON_PRETTY_PRINT));
|
||||
die(1);
|
||||
}
|
||||
$http = new Client();
|
||||
$this->io->verbose('Downloading file');
|
||||
$response = $http->get($url);
|
||||
return $response->getJson();
|
||||
}
|
||||
|
||||
private function getDataFromFile($path)
|
||||
{
|
||||
$file = new File($path);
|
||||
if ($file->exists()) {
|
||||
$this->io->verbose('Reading file');
|
||||
$data = $file->read();
|
||||
$file->close();
|
||||
if (!empty($data)) {
|
||||
$data = json_decode($data, true);
|
||||
if (is_null($data)) {
|
||||
$this->io->error('Error while parsing the source file');
|
||||
die(1);
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private function saveDataOnDisk($filename, $data)
|
||||
{
|
||||
$file = new File($filename, true);
|
||||
$file->write(json_encode($data));
|
||||
$this->io->out("File saved at: {$file->pwd()}");
|
||||
$file->close();
|
||||
}
|
||||
|
||||
private function getConfigFromFile($configPath)
|
||||
{
|
||||
$file = new File($configPath);
|
||||
if ($file->exists()) {
|
||||
$config = $file->read();
|
||||
$file->close();
|
||||
if (!empty($config)) {
|
||||
$config = json_decode($config, true);
|
||||
if (is_null($config)) {
|
||||
$this->io->error('Error while parsing the configuration file');
|
||||
die(1);
|
||||
}
|
||||
return $config;
|
||||
} else {
|
||||
$this->io->error('Configuration file cound not be read');
|
||||
}
|
||||
} else {
|
||||
$this->io->error('Configuration file not found');
|
||||
}
|
||||
}
|
||||
|
||||
private function processConfig($config)
|
||||
{
|
||||
$allowedModels = ['Organisations', 'Individuals'];
|
||||
$allowedFinderType = ['exact', 'bestMatch'];
|
||||
if (empty($config['source']) || empty($config['finder']) || empty($config['target']) || empty($config['squashingData'])) {
|
||||
$this->io->error('Error while parsing the configuration file, some of these fields are missing: `source`, `finder`, `target`, `squashingData`');
|
||||
die(1);
|
||||
}
|
||||
if (!empty($config['target']['model'])) {
|
||||
if (!in_array($config['target']['model'], $allowedModels)) {
|
||||
$this->io->error('Error while parsing the configuration file, target.model configuration must be one of: ' . implode(', ', $allowedModels));
|
||||
die(1);
|
||||
}
|
||||
} else {
|
||||
$this->io->error('Error while parsing the configuration file, target.model configuration is missing');
|
||||
die(1);
|
||||
}
|
||||
|
||||
if (empty($config['finder']['path']) || empty($config['finder']['joinFields'])) {
|
||||
$this->io->error('Error while parsing the configuration file, some finder fields are missing');
|
||||
die(1);
|
||||
}
|
||||
if (!empty($config['finder']['type'])) {
|
||||
if (!in_array($config['finder']['type'], $allowedFinderType)) {
|
||||
$this->io->error('Error while parsing the configuration file, finder.type configuration must be one of: ' . implode(', ', $allowedFinderType));
|
||||
die(1);
|
||||
}
|
||||
} else {
|
||||
$this->io->error('Error while parsing the configuration file, finder.type configuration is missing');
|
||||
die(1);
|
||||
}
|
||||
}
|
||||
|
||||
private function transformResultSetsIntoTable($result, $header=[])
|
||||
{
|
||||
$table = [[]];
|
||||
if (!empty($result)) {
|
||||
$tableHeader = empty($header) ? array_keys($result[0]) : $header;
|
||||
$tableContent = [];
|
||||
foreach ($result as $item) {
|
||||
if (empty($header)) {
|
||||
$tableContent[] = array_map('strval', array_values($item));
|
||||
} else {
|
||||
$row = [];
|
||||
foreach ($tableHeader as $key) {
|
||||
$row[] = (string) $item[$key];
|
||||
}
|
||||
$tableContent[] = $row;
|
||||
}
|
||||
}
|
||||
$table = array_merge([$tableHeader], $tableContent);
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
private function transformEntitiesIntoTable($entities, $header=[])
|
||||
{
|
||||
$table = [[]];
|
||||
if (!empty($entities)) {
|
||||
if (empty($header)) {
|
||||
if (!is_array($entities[0])) {
|
||||
$tableHeader = array_keys(Hash::flatten($entities[0]->toArray()));
|
||||
} else {
|
||||
$tableHeader = array_keys($entities[0]);
|
||||
}
|
||||
} else {
|
||||
$tableHeader = $header;
|
||||
}
|
||||
$tableContent = [];
|
||||
foreach ($entities as $entity) {
|
||||
$row = [];
|
||||
foreach ($tableHeader as $key) {
|
||||
$subKeys = explode('.', $key);
|
||||
if (is_array($entity[$key])) {
|
||||
$row[] = json_encode($entity[$key]);
|
||||
} else {
|
||||
$row[] = (string) $entity[$key];
|
||||
}
|
||||
}
|
||||
$tableContent[] = $row;
|
||||
}
|
||||
$table = array_merge([$tableHeader], $tableContent);
|
||||
}
|
||||
return $table;
|
||||
}
|
||||
|
||||
private function invertArray($data)
|
||||
{
|
||||
$inverted = [];
|
||||
foreach ($data as $key => $values) {
|
||||
foreach ($values as $i => $value) {
|
||||
$inverted[$i][$key] = $value;
|
||||
}
|
||||
}
|
||||
return $inverted;
|
||||
}
|
||||
|
||||
private function genUUID($value)
|
||||
{
|
||||
return Text::uuid();
|
||||
}
|
||||
|
||||
private function nullToEmptyString($value)
|
||||
{
|
||||
return is_null($value) ? '' : $value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"source": "/var/www/cerebrate/src/Command/misp_org.json",
|
||||
"finder": {
|
||||
"joinFields": {
|
||||
"squashed": "name",
|
||||
"squashing": "name"
|
||||
},
|
||||
"path": "{n}.Organisation",
|
||||
"type": "exact"
|
||||
},
|
||||
"target": {
|
||||
"model": "Organisations",
|
||||
"squashedField": "uuid"
|
||||
},
|
||||
"squashingData": {
|
||||
"squashingField": "uuid",
|
||||
"massage": "validateUUID"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue