Implements global page rank widget

This commit is contained in:
Alicia Sykes
2023-08-14 22:15:00 +01:00
parent fe1e74a22f
commit 8d4c09ffd9
4 changed files with 194 additions and 0 deletions

70
api/legacy-rank.js Normal file
View File

@@ -0,0 +1,70 @@
const axios = require('axios');
const unzipper = require('unzipper');
const csv = require('csv-parser');
const fs = require('fs');
const middleware = require('./_common/middleware');
// Should also work with the following sources:
// https://www.domcop.com/files/top/top10milliondomains.csv.zip
// https://tranco-list.eu/top-1m.csv.zip
// https://www.domcop.com/files/top/top10milliondomains.csv.zip
// https://radar.cloudflare.com/charts/LargerTopDomainsTable/attachment?id=525&top=1000000
// https://statvoo.com/dl/top-1million-sites.csv.zip
const FILE_URL = 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip';
const TEMP_FILE_PATH = '/tmp/top-1m.csv';
const handler = async (url) => {
let domain = null;
try {
domain = new URL(url).hostname;
} catch (e) {
throw new Error('Invalid URL');
}
// Download and unzip the file if not in cache
if (!fs.existsSync(TEMP_FILE_PATH)) {
const response = await axios({
method: 'GET',
url: FILE_URL,
responseType: 'stream'
});
await new Promise((resolve, reject) => {
response.data
.pipe(unzipper.Extract({ path: '/tmp' }))
.on('close', resolve)
.on('error', reject);
});
}
// Parse the CSV and find the rank
return new Promise((resolve, reject) => {
const csvStream = fs.createReadStream(TEMP_FILE_PATH)
.pipe(csv({
headers: ['rank', 'domain'],
}))
.on('data', (row) => {
if (row.domain === domain) {
csvStream.destroy();
resolve({
domain: domain,
rank: row.rank,
isFound: true,
});
}
})
.on('end', () => {
resolve({
skipped: `Skipping, as ${domain} is not present in the Umbrella top 1M list.`,
domain: domain,
isFound: false,
});
})
.on('error', reject);
});
};
exports.handler = middleware(handler);

24
api/rank.js Normal file
View File

@@ -0,0 +1,24 @@
const axios = require('axios');
const middleware = require('./_common/middleware');
const handler = async (url) => {
const domain = url ? new URL(url).hostname : null;
if (!domain) throw new Error('Invalid URL');
try {
const auth = process.env.TRANCO_API_KEY ? // Auth is optional.
{ auth: { username: process.env.TRANCO_USERNAME, password: process.env.TRANCO_API_KEY } }
: {};
const response = await axios.get(
`https://tranco-list.eu/api/ranks/domain/${domain}`, { timeout: 2000 }, auth,
);
if (!response.data || !response.data.ranks || response.data.ranks.length === 0) {
return { skipped: `Skipping, as ${domain} isn't ranked in the top 100 million sites yet.`};
}
return response.data;
} catch (error) {
return { error: `Unable to fetch rank, ${error.message}` };
}
};
exports.handler = middleware(handler);