1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
<?php
require_once __DIR__.'/common.php';
ini_set('memory_limit', '3072M');
function findAllAttachments(array $obj): array {
$list = [];
if (!empty($obj['attachments'])) {
foreach ($obj['attachments'] as $attachment) {
$list[] = $attachment;
if ($attachment['type'] == 'wall' || $attachment['type'] == 'wall_reply') {
$list = array_merge($list, findAllAttachments($attachment));
}
}
$list = array_merge($list, $obj['attachments']);
}
if (!empty($obj['fwd_messages'])) {
foreach ($obj['fwd_messages'] as $fwd_message) {
$list = array_merge($list, findAllAttachments($fwd_message));
}
}
$list = array_filter($list, function($attachment) {
static $ids = [];
$type = $attachment['type'];
if (!isset($attachment[$type]))
// weird
return false;
$attach = $attachment[$type];
$id = $type;
if (isset($attach['owner_id']))
$id .= $attach['owner_id'].'_';
if (isset($attach['id']))
$id .= isset($attach['id']);
if (isset($ids[$id]))
return false;
$ids[$id] = true;
return true;
});
return $list;
}
$api_dir = ARCHIVE_DIR.'/messages/api';
foreach (scandir($api_dir) as $n) {
if ($n == '.' || $n == '..')
continue;
foreach (scandir($api_dir.'/'.$n) as $file) {
if (!preg_match('/^\d+\.txt$/', $file))
continue;
$obj = json_decode(file_get_contents($api_dir.'/'.$n.'/'.$file), true);
$attachments = findAllAttachments($obj);
$docs = array_filter($attachments, function($a) {
return $a['type'] == 'doc';
});
if (empty($docs))
continue;
foreach ($docs as $doc) {
$doc = $doc['doc']; // seriously?!
$doc_id = $doc['owner_id'].'_'.$doc['id'];
$doc_dir = ARCHIVE_DIR.'/messages/docs/'.$doc_id;
if (!file_exists($doc_dir)) {
if (!mkdir($doc_dir, 0755, true))
fatalError("failed to mkdir({$doc_dir})");
}
// TODO sanitize filename
$doc_file = $doc_dir.'/'.$doc['title'];
if (file_exists($doc_file)) {
if (filesize($doc_file) == 56655)
unlink($doc_file);
else {
echo "$doc_id already exists\n";
continue;
}
}
list($code, $body) = httpGet($doc['url']);
if ($code != 200) {
fprintf(STDERR, "failed to download {$doc_id} ({$doc['url']})\n");
rmdir($doc_dir);
continue;
}
file_put_contents($doc_file, $body);
echo "$doc_id saved, ".filesize($doc_file)." bytes\n";
unset($body);
}
}
}
|