diff --git a/netlify.toml b/netlify.toml index b264053..432f87b 100644 --- a/netlify.toml +++ b/netlify.toml @@ -124,6 +124,11 @@ to = "/.netlify/functions/tech-stack" status = 301 force = true +[[redirects]] + from = "/sitemap" + to = "/.netlify/functions/sitemap" + status = 301 + force = true # For router history mode, ensure pages land on index [[redirects]] diff --git a/package.json b/package.json index ff736fb..b0e35d3 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,8 @@ "tsparticles": "^2.0.6", "typescript": "^4.7.3", "wappalyzer": "^6.10.63", - "web-vitals": "^2.1.4" + "web-vitals": "^2.1.4", + "xml2js": "^0.6.0" }, "scripts": { "start": "react-scripts start", diff --git a/server/lambda/sitemap.js b/server/lambda/sitemap.js new file mode 100644 index 0000000..8b57f01 --- /dev/null +++ b/server/lambda/sitemap.js @@ -0,0 +1,41 @@ +const axios = require('axios'); +const xml2js = require('xml2js'); + +exports.handler = async (event) => { + const baseUrl = event.queryStringParameters.url.replace(/^(?:https?:\/\/)?/i, ""); + const url = baseUrl.startsWith('http') ? baseUrl : `http://${baseUrl}`; + let sitemapUrl; + + try { + // Fetch robots.txt + const robotsRes = await axios.get(`${url}/robots.txt`); + const robotsTxt = robotsRes.data.split('\n'); + + for (let line of robotsTxt) { + if (line.startsWith('Sitemap:')) { + sitemapUrl = line.split(' ')[1]; + } + } + + if (!sitemapUrl) { + return { + statusCode: 404, + body: JSON.stringify({ error: 'Sitemap not found in robots.txt' }), + }; + } + + // Fetch sitemap + const sitemapRes = await axios.get(sitemapUrl); + const sitemap = await xml2js.parseStringPromise(sitemapRes.data); + + return { + statusCode: 200, + body: JSON.stringify(sitemap), + }; + } catch (error) { + return { + statusCode: 500, + body: JSON.stringify({ error: error.message }), + }; + } +}; diff --git a/src/components/Results/Sitemap.tsx b/src/components/Results/Sitemap.tsx new file mode 100644 index 0000000..41f7e7e --- /dev/null +++ b/src/components/Results/Sitemap.tsx @@ -0,0 +1,58 @@ + +import { Card } from 'components/Form/Card'; +import Heading from 'components/Form/Heading'; +import Row, { ExpandableRow } from 'components/Form/Row'; +import colors from 'styles/colors'; + +const cardStyles = ` + max-height: 50rem; + overflow-y: auto; + a { + color: ${colors.primary}; + } + small { + margin-top: 1rem; + opacity: 0.5; + display: block; + a { color: ${colors.primary}; } + } +`; + +const SitemapCard = (props: {data: any, title: string, actionButtons: any }): JSX.Element => { + console.log(props.data); + const normalSiteMap = props.data.url || props.data.urlset?.url || null; + const siteMapIndex = props.data.sitemapindex?.sitemap || null; + + const makeExpandableRowData = (site: any) => { + const results = []; + if (site.lastmod) { results.push({lbl: 'Last Modified', val: site.lastmod[0]}); } + if (site.changefreq) { results.push({lbl: 'Change Frequency', val: site.changefreq[0]}); } + if (site.priority) { results.push({lbl: 'Priority', val: site.priority[0]}); } + return results; + }; + + const getPathFromUrl = (url: string) => { + const urlObj = new URL(url); + return urlObj.pathname; + }; + + return ( + + { + normalSiteMap && normalSiteMap.map((subpage: any, index: number) => { + return () + }) + } + { siteMapIndex &&

+ This site returns a sitemap index, which is a list of sitemaps. +

} + { + siteMapIndex && siteMapIndex.map((subpage: any, index: number) => { + return ({getPathFromUrl(subpage.loc[0])}); + }) + } +
+ ); +} + +export default SitemapCard; diff --git a/src/components/misc/ProgressBar.tsx b/src/components/misc/ProgressBar.tsx index 8a20bb3..2b04cac 100644 --- a/src/components/misc/ProgressBar.tsx +++ b/src/components/misc/ProgressBar.tsx @@ -190,7 +190,7 @@ const jobNames = [ 'domain-lookup', 'tech-stack', 'hosts', - 'lighthouse', + 'quality', 'cookies', 'server-info', 'redirects', @@ -200,8 +200,9 @@ const jobNames = [ 'ports', 'screenshot', 'txt-records', + 'sitemap', 'hsts', - 'whois', + // 'whois', 'features', 'carbon', 'trace-route', diff --git a/src/pages/Results.tsx b/src/pages/Results.tsx index 8015c4f..83686b8 100644 --- a/src/pages/Results.tsx +++ b/src/pages/Results.tsx @@ -35,6 +35,7 @@ import CarbonFootprintCard from 'components/Results/CarbonFootprint'; import SiteFeaturesCard from 'components/Results/SiteFeatures'; import DnsSecCard from 'components/Results/DnsSec'; import HstsCard from 'components/Results/Hsts'; +import SitemapCard from 'components/Results/Sitemap'; import DomainLookup from 'components/Results/DomainLookup'; import DnsServerCard from 'components/Results/DnsServer'; import TechStackCard from 'components/Results/TechStack'; @@ -60,24 +61,11 @@ import { const ResultsOuter = styled.div` display: flex; flex-direction: column; - .my-masonry-grid { - display: -webkit-box; /* Not needed if autoprefixing */ - display: -ms-flexbox; /* Not needed if autoprefixing */ + .masonry-grid { display: flex; - // margin: 1rem; - // margin-left: -30px; /* gutter size offset */ width: auto; } - .my-masonry-grid_column { - // margin-left: 30px; /* gutter size */ - background-clip: padding-box; - } - - /* Style your items */ - .my-masonry-grid_column > div { /* change div to reference your elements you put in */ - // background: grey; - // margin-bottom: 30px; -} + .masonry-grid-col section { margin: 1rem 0.5rem; } `; const ResultsContent = styled.section` @@ -165,7 +153,6 @@ const Results = (): JSX.Element => { }, []); const parseJson = (response: Response): Promise => { - // return response.json() return new Promise((resolve) => { if (response.ok) { response.json() @@ -181,20 +168,6 @@ const Results = (): JSX.Element => { } }); }; - - - - // const parseJson = (response: Response): Promise => { - // if (response.status >= 400) { - // return new Promise((resolve) => resolve({ error: `Failed to fetch data: ${response.statusText}` })); - // } - // return new Promise((resolve) => { - // if (!response) { resolve({ error: 'No response from server' }); } - // response.json() - // .catch(error => resolve({ error: `Failed to process response, likely due to Netlify's 10-sec limit on lambda functions. Error: ${error}`})); - // }); - // }; - useEffect(() => { if (!addressType || addressType === 'empt') { @@ -312,16 +285,6 @@ const Results = (): JSX.Element => { .then(res => applyWhoIsResults(res)), }); - // Fetch and parse built-with results - // const [technologyResults, updateTechnologyResults] = useMotherHook({ - // jobId: 'built-with', - // updateLoadingJobs, - // addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly }, - // fetchRequest: () => fetch(`https://api.builtwith.com/v21/api.json?KEY=${keys.builtWith}&LOOKUP=${address}`) - // .then(res => parseJson(res)) - // .then(res => makeTechnologies(res)), - // }); - // Fetches DNS TXT records const [txtRecordResults, updateTxtRecordResults] = useMotherHook({ jobId: 'txt-records', @@ -378,6 +341,14 @@ const Results = (): JSX.Element => { fetchRequest: () => fetch(`/check-hsts?url=${address}`).then(res => parseJson(res)), }); + // Get a websites listed pages, from sitemap + const [sitemapResults, updateSitemapResults] = useMotherHook({ + jobId: 'sitemap', + updateLoadingJobs, + addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly }, + fetchRequest: () => fetch(`/sitemap?url=${address}`).then(res => parseJson(res)), + }); + // Get site features from BuiltWith const [siteFeaturesResults, updateSiteFeaturesResults] = useMotherHook({ jobId: 'features', @@ -449,12 +420,13 @@ const Results = (): JSX.Element => { { id: 'dns', title: 'DNS Records', result: dnsResults, Component: DnsRecordsCard, refresh: updateDnsResults }, { id: 'hosts', title: 'Host Names', result: shoadnResults?.hostnames, Component: HostNamesCard, refresh: updateShodanResults }, { id: 'tech-stack', title: 'Tech Stack', result: techStackResults, Component: TechStackCard, refresh: updateTechStackResults }, - { id: 'lighthouse', title: 'Performance', result: lighthouseResults, Component: LighthouseCard, refresh: updateLighthouseResults }, + { id: 'quality', title: 'Quality Summary', result: lighthouseResults, Component: LighthouseCard, refresh: updateLighthouseResults }, { id: 'cookies', title: 'Cookies', result: cookieResults, Component: CookiesCard, refresh: updateCookieResults }, { id: 'trace-route', title: 'Trace Route', result: traceRouteResults, Component: TraceRouteCard, refresh: updateTraceRouteResults }, { id: 'server-info', title: 'Server Info', result: shoadnResults?.serverInfo, Component: ServerInfoCard, refresh: updateShodanResults }, { id: 'redirects', title: 'Redirects', result: redirectResults, Component: RedirectsCard, refresh: updateRedirectResults }, { id: 'robots-txt', title: 'Crawl Rules', result: robotsTxtResults, Component: RobotsTxtCard, refresh: updateRobotsTxtResults }, + { id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults }, { id: 'dnssec', title: 'DNSSEC', result: dnsSecResults, Component: DnsSecCard, refresh: updateDnsSecResults }, { id: 'status', title: 'Server Status', result: serverStatusResults, Component: ServerStatusCard, refresh: updateServerStatusResults }, { id: 'ports', title: 'Open Ports', result: portsResults, Component: OpenPortsCard, refresh: updatePortsResults }, @@ -526,8 +498,8 @@ const Results = (): JSX.Element => { + className="masonry-grid" + columnClassName="masonry-grid-col"> { resultCardData.map(({ id, title, result, refresh, Component }, index: number) => ( (result && !result.error) ? ( diff --git a/yarn.lock b/yarn.lock index f2baa7b..e0e1131 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8699,7 +8699,7 @@ sass-loader@^12.3.0: klona "^2.0.4" neo-async "^2.6.2" -sax@~1.2.4: +sax@>=0.6.0, sax@~1.2.4: version "1.2.4" resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== @@ -10552,6 +10552,19 @@ xml-name-validator@^3.0.0: resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a" integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw== +xml2js@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.0.tgz#07afc447a97d2bd6507a1f76eeadddb09f7a8282" + integrity sha512-eLTh0kA8uHceqesPqSE+VvO1CDDJWMwlQfB6LuN6T8w6MaDJ8Txm8P7s5cHD0miF0V+GGTZrDQfxPZQVsur33w== + dependencies: + sax ">=0.6.0" + xmlbuilder "~11.0.0" + +xmlbuilder@~11.0.0: + version "11.0.1" + resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" + integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA== + xmlchars@^2.2.0: version "2.2.0" resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"