From df24445ac691a5da195f18a3ca3f214e5928e38a Mon Sep 17 00:00:00 2001 From: Alicia Sykes Date: Sat, 29 Jul 2023 09:38:25 +0100 Subject: [PATCH] Re-wrote sitemap lambda function --- api/sitemap.js | 66 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/api/sitemap.js b/api/sitemap.js index 8b57f01..ba2453a 100644 --- a/api/sitemap.js +++ b/api/sitemap.js @@ -2,40 +2,60 @@ const axios = require('axios'); const xml2js = require('xml2js'); exports.handler = async (event) => { - const baseUrl = event.queryStringParameters.url.replace(/^(?:https?:\/\/)?/i, ""); - const url = baseUrl.startsWith('http') ? baseUrl : `http://${baseUrl}`; - let sitemapUrl; + const url = event.queryStringParameters.url; + let sitemapUrl = `${url}/sitemap.xml`; try { - // Fetch robots.txt - const robotsRes = await axios.get(`${url}/robots.txt`); - const robotsTxt = robotsRes.data.split('\n'); + // Try to fetch sitemap directly + let sitemapRes; + try { + sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 }); + } catch (error) { + if (error.response && error.response.status === 404) { + // If sitemap not found, try to fetch it from robots.txt + const robotsRes = await axios.get(`${url}/robots.txt`, { timeout: 5000 }); + const robotsTxt = robotsRes.data.split('\n'); - for (let line of robotsTxt) { - if (line.startsWith('Sitemap:')) { - sitemapUrl = line.split(' ')[1]; + for (let line of robotsTxt) { + if (line.toLowerCase().startsWith('sitemap:')) { + sitemapUrl = line.split(' ')[1].trim(); + break; + } + } + + if (!sitemapUrl) { + return { + statusCode: 404, + body: JSON.stringify({ skipped: 'No sitemap found' }), + }; + } + + sitemapRes = await axios.get(sitemapUrl, { timeout: 5000 }); + } else { + throw error; // If other error, throw it } } - if (!sitemapUrl) { - return { - statusCode: 404, - body: JSON.stringify({ error: 'Sitemap not found in robots.txt' }), - }; - } - - // Fetch sitemap - const sitemapRes = await axios.get(sitemapUrl); - const sitemap = await xml2js.parseStringPromise(sitemapRes.data); + const parser = new xml2js.Parser(); + const sitemap = await parser.parseStringPromise(sitemapRes.data); return { statusCode: 200, body: JSON.stringify(sitemap), }; } catch (error) { - return { - statusCode: 500, - body: JSON.stringify({ error: error.message }), - }; + // If error occurs + console.log(error.message); + if (error.code === 'ECONNABORTED') { + return { + statusCode: 500, + body: JSON.stringify({ error: 'Request timed out' }), + }; + } else { + return { + statusCode: 500, + body: JSON.stringify({ error: error.message }), + }; + } } };