2021-08-08 07:01:37 -04:00
|
|
|
const { promises: fs } = require("fs");
|
|
|
|
const childProcess = require("child_process");
|
|
|
|
const path = require("path");
|
2020-01-07 20:35:39 -05:00
|
|
|
|
|
|
|
const tests = {
|
2021-08-08 07:01:37 -04:00
|
|
|
Amazon: "https://www.amazon.com/",
|
|
|
|
BBC: "https://www.bbc.co.uk/",
|
|
|
|
Bootstrap: "https://getbootstrap.com/docs/3.4/css/",
|
|
|
|
Bing: "https://www.bing.com/",
|
2020-01-07 20:35:39 -05:00
|
|
|
"Coding Horror": "https://blog.codinghorror.com/",
|
2020-01-09 06:40:04 -05:00
|
|
|
"ECMA-262": "https://www.ecma-international.org/ecma-262/10.0/index.html",
|
2021-08-08 07:01:37 -04:00
|
|
|
Google: "https://www.google.com/",
|
2020-01-07 20:35:39 -05:00
|
|
|
"Hacker News": "https://news.ycombinator.com/",
|
2020-01-08 09:00:05 -05:00
|
|
|
"NY Times": "https://www.nytimes.com/",
|
2021-08-08 07:01:37 -04:00
|
|
|
Reddit: "https://www.reddit.com/",
|
2020-01-08 09:00:05 -05:00
|
|
|
"Stack Overflow": "https://www.stackoverflow.com/",
|
2021-08-08 07:01:37 -04:00
|
|
|
Twitter: "https://twitter.com/",
|
|
|
|
Wikipedia: "https://en.wikipedia.org/wiki/Soil",
|
2020-01-07 20:35:39 -05:00
|
|
|
};
|
|
|
|
|
2021-08-08 07:01:37 -04:00
|
|
|
const fetchTest = (name, url) =>
|
|
|
|
new Promise((resolve, reject) => {
|
|
|
|
// Use curl to follow redirects without needing a Node.js library.
|
|
|
|
childProcess.execFile(
|
|
|
|
"curl",
|
|
|
|
[
|
|
|
|
"-H",
|
|
|
|
`User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; rv:71.0) Gecko/20100101 Firefox/71.0`,
|
|
|
|
"-H",
|
|
|
|
"Accept: */*",
|
|
|
|
"-fLSs",
|
|
|
|
url,
|
|
|
|
],
|
|
|
|
(error, stdout, stderr) => {
|
|
|
|
if (error) {
|
|
|
|
return reject(error);
|
|
|
|
}
|
|
|
|
if (stderr) {
|
|
|
|
return reject(new Error(`stderr: ${stderr}`));
|
|
|
|
}
|
|
|
|
resolve([name, stdout]);
|
|
|
|
}
|
|
|
|
);
|
2020-01-07 20:35:39 -05:00
|
|
|
});
|
|
|
|
|
|
|
|
(async () => {
|
2021-08-08 07:01:37 -04:00
|
|
|
const existing = await fs.readdir(path.join(__dirname, "tests"));
|
|
|
|
await Promise.all(
|
|
|
|
existing.map((e) => fs.unlink(path.join(__dirname, "tests", e)))
|
|
|
|
);
|
2020-01-07 20:35:39 -05:00
|
|
|
|
|
|
|
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
|
2021-08-08 07:01:37 -04:00
|
|
|
for (const [name, html] of await Promise.all(
|
|
|
|
Object.entries(tests).map(([name, url]) => fetchTest(name, url))
|
|
|
|
)) {
|
2020-01-14 05:10:49 -05:00
|
|
|
// Apply some fixes to HTML.
|
|
|
|
const fixed = html
|
|
|
|
// Fix early termination of conditional comment in Amazon.
|
2021-08-08 07:01:37 -04:00
|
|
|
.replace("--></style>\n<![endif]-->", "</style>\n<![endif]-->")
|
2020-01-14 05:10:49 -05:00
|
|
|
// Fix closing of void tag in Amazon.
|
2021-08-08 07:01:37 -04:00
|
|
|
.replace(/><\/hr>/g, "/>")
|
2020-01-14 05:10:49 -05:00
|
|
|
// Fix extra '</div>' in BBC.
|
2021-08-08 07:01:37 -04:00
|
|
|
.replace(
|
|
|
|
"</a></span></small></div></div></div></footer>",
|
|
|
|
"</a></span></small></div></div></footer>"
|
|
|
|
)
|
2020-01-14 05:10:49 -05:00
|
|
|
// Fix broken attribute value in Stack Overflow.
|
2021-08-08 07:01:37 -04:00
|
|
|
.replace('height=151"', 'height="151"');
|
|
|
|
await fs.writeFile(path.join(__dirname, "tests", name), fixed);
|
2020-01-07 20:35:39 -05:00
|
|
|
}
|
2021-08-08 07:01:37 -04:00
|
|
|
})().catch(console.error);
|