-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinternet_to_database.php
More file actions
113 lines (94 loc) · 3.59 KB
/
internet_to_database.php
File metadata and controls
113 lines (94 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
<?php
require_once __DIR__ . '/../vendor/autoload.php';
/**
* Demonstration how some data from internet can be downloaded and kept in sync.
*
* Demo downloads country codes and their ISO-3166-2 codes.
* The source is a wiki page https://en.wikipedia.org/wiki/ISO_3166-2
* It contains the list of countries already sorted by code.
*/
$pdo = new PDO('sqlite:demo.sqlite');
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$pdo->query('CREATE TABLE IF NOT EXISTS countries (code TEXT NOT NULL, name TEXT NOT NULL)');
/**
* Generator returns country names from internet.
* Countries are ordered by code and then by name.
* The returned string is in format '<code><name>' ie the two first characters are the country code.
*/
function source(): Generator
{
$ch = curl_init();
try {
curl_setopt($ch, CURLOPT_URL, 'https://en.wikipedia.org/wiki/ISO_3166-2');
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$content = curl_exec($ch);
if ($content === false) {
throw new Exception('Could not download country names and codes from wiki page. ' . curl_error($ch));
}
} finally {
curl_close($ch);
}
$dom = new DOMDocument();
$use_errors = libxml_use_internal_errors(true);
try {
if ($dom->loadHTML($content, LIBXML_NOWARNING) === false) {
throw new Exception('Source data loading failed. Could not parse the wiki page. ' . libxml_get_last_error());
}
} finally {
libxml_clear_errors();
libxml_use_internal_errors($use_errors);
}
$xpath = new DOMXPath($dom);
// we know the first one is the table we are interested in it.
$tables = $xpath->query('//table');
$table = $tables->item(0);
$rows = $xpath->query("./tbody/tr/td/a[starts-with(@href, '/wiki/ISO_3166-2:')]/parent::td/parent::tr", $table);
if ($rows === false or $rows->length === 0) { // something went wrong. Terminate in order to prevent deleting current target.
throw new \Exception('Source data loading failed. Could not find companies data in wiki page content.');
}
foreach ($rows as $row) {
$columns = $row->getElementsByTagName('td');
$code = trim($columns[0]->nodeValue);
$name = trim($columns[1]->nodeValue);
yield $code . $name;
}
}
/**
* Generator returning country names from target database.
* Countries are ordered by code and then by name.
* The returned string is in format '<code><name>' ie the two first characters are the country code.
*/
function target($pdo): Generator
{
$sql = 'SELECT code, name FROM countries ORDER BY code ASC, name ASC';
foreach ($pdo->query($sql) as $row) {
yield $row['code'] . $row['name'];
}
}
$sthAdd = $pdo->prepare('INSERT INTO countries (code, name) VALUES (?, ?)');
$sthRemove = $pdo->prepare('DELETE FROM countries WHERE code=?');
$counts = [
'added' => 0,
'removed' => 0,
];
$synchronization = new \Raigu\OrderedListsSynchronization\Synchronization();
$synchronization(
source(),
target($pdo),
function ($element) use ($sthAdd, &$counts) {
$code = substr($element, 0, 2);
$name = substr($element, 2);
echo "+ {$code}" . PHP_EOL;
$counts['added'] += 1;
$sthAdd->execute([$code, $name]);
},
function ($element) use ($sthRemove, &$counts) {
$code = substr($element, 0, 2);
echo "- {$code}" . PHP_EOL;
$counts['removed'] += 1;
$sthRemove->execute([$code]);
}
);
echo "Added: {$counts['added']}" . PHP_EOL;
echo "Removed: {$counts['removed']}" . PHP_EOL;