-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathurisearch.js
More file actions
107 lines (87 loc) · 3.11 KB
/
urisearch.js
File metadata and controls
107 lines (87 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
var request = require('request'),
cheerio = require('cheerio'),
url = require('url'),
path = require('path');
var protocol = 'http://',
domain = 'larvit.se',
searchStr = 'Linux';
var uris = {'/': {'visited': false, 'body': undefined, 'stringFound': undefined, 'error': undefined, 'statusCode': undefined}};
function investigateUris() {
for (var uri in uris) {
var uri = uri;
if (uris[uri].visited === false && uri) {
var fullUri = '';
if (uri == '/') fullUri = protocol + domain + uri;
else fullUri = protocol + domain + '/' + uri;
uris[uri].visited = true; // Must be done before the request, so no other request is tried at the same time for the same URI
request(fullUri, function(error, response, body) {
var uri = response.req.path;
if (uri[0] == '/' && uri != '/')
uri = uri.substr(1);
if ( ! error && response != undefined && typeof response === 'Object')
uris[uri].statusCode = response.statusCode;
if ( ! error && response.statusCode == 200) {
//uris[uri].body = body; // becomes to large to often (binary images etc)
if (body.indexOf(searchStr) > 0)
uris[uri].stringFound = true;
else
uris[uri].stringFound = false;
var $ = cheerio.load(body);
var links = $('[href], [src]');
var runAgain = false;
$(links).each(function(i, link) {
if ($(link).attr('href'))
var href = $(link).attr('href');
else if ($(link).attr('src'))
var href = $(link).attr('src');
if (typeof href == 'string' && href.length) {
var parsedHref = url.parse(href);
var uriToAdd = '';
if (( ! parsedHref.host || parsedHref.host === domain) && parsedHref.path) {
// We need to resolve the path, if it is for example relative
var resolvedPath = path.resolve(path.dirname(response.req.path) + '/', parsedHref.path);
// If the first character in the URI is a slash, remove it (we will add it later to all URIs)
if (resolvedPath[0] == '/')
resolvedPath = resolvedPath.substr(1);
if (parsedHref.query)
uriToAdd = resolvedPath + '?' + parsedHref.query;
else
uriToAdd = resolvedPath;
if ( ! uris[uriToAdd]) {
uris[uriToAdd] = {'visited': false, 'body': undefined, 'stringFound': false, 'error': undefined, 'statusCode': undefined};
runAgain = true;
}
}
}
});
if (runAgain) {
investigateUris();
} else {
// Check if we should print the result
var printResult = true;
for (var uri in uris) {
if (uris[uri].stringFound === undefined) {
printResult = false;
break;
}
}
if (printResult) {
console.log('All URIs searched:');
console.log('==================');
for (var uri in uris)
console.log(uri);
console.log('\nURIs containing the search string:');
for (var uri in uris) {
if (uris[uri].stringFound)
console.log(uri);
}
}
}
} else {
uris[uri].error = error;
}
});
}
}
}
investigateUris();