From 6a854af79e1a4a1f8181bc595ac6506b892b233c Mon Sep 17 00:00:00 2001 From: Alicia Sykes Date: Thu, 27 Jul 2023 22:06:40 +0100 Subject: [PATCH 1/5] Fetch and display externally linked content hrefs --- api/content-links.js | 48 +++++++++++++ package.json | 1 + src/components/Results/ContentLinks.tsx | 89 +++++++++++++++++++++++ src/components/misc/ProgressBar.tsx | 1 + src/pages/Results.tsx | 81 ++++++--------------- yarn.lock | 95 ++++++++++++++++++++++++- 6 files changed, 255 insertions(+), 60 deletions(-) create mode 100644 api/content-links.js create mode 100644 src/components/Results/ContentLinks.tsx diff --git a/api/content-links.js b/api/content-links.js new file mode 100644 index 0000000..34e4205 --- /dev/null +++ b/api/content-links.js @@ -0,0 +1,48 @@ +const axios = require('axios'); +const cheerio = require('cheerio'); +const urlLib = require('url'); + +exports.handler = async (event, context) => { + let url = event.queryStringParameters.url; + + // Check if url includes protocol + if (!url.startsWith('http://') && !url.startsWith('https://')) { + url = 'http://' + url; + } + + try { + const response = await axios.get(url); + const html = response.data; + const $ = cheerio.load(html); + const internalLinksMap = new Map(); + const externalLinksMap = new Map(); + + $('a[href]').each((i, link) => { + const href = $(link).attr('href'); + const absoluteUrl = urlLib.resolve(url, href); + + if (absoluteUrl.startsWith(url)) { + const count = internalLinksMap.get(absoluteUrl) || 0; + internalLinksMap.set(absoluteUrl, count + 1); + } else if (href.startsWith('http://') || href.startsWith('https://')) { + const count = externalLinksMap.get(absoluteUrl) || 0; + externalLinksMap.set(absoluteUrl, count + 1); + } + }); + + // Convert maps to sorted arrays + const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]); + const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]); + + return { + statusCode: 200, + body: JSON.stringify({ internal: internalLinks, external: externalLinks }), + }; + } catch (error) { + console.log(error); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Failed fetching data' }), + }; + } +}; diff --git a/package.json b/package.json index 1471758..b25c5cb 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "@types/react-simple-maps": "^3.0.0", "@types/styled-components": "^5.1.26", "axios": "^1.4.0", + "cheerio": "^1.0.0-rc.12", "chrome-aws-lambda": "^10.1.0", "flatted": "^3.2.7", "follow-redirects": "^1.15.2", diff --git a/src/components/Results/ContentLinks.tsx b/src/components/Results/ContentLinks.tsx new file mode 100644 index 0000000..d2ea0a5 --- /dev/null +++ b/src/components/Results/ContentLinks.tsx @@ -0,0 +1,89 @@ +import { Card } from 'components/Form/Card'; +import Row from 'components/Form/Row'; +import Heading from 'components/Form/Heading'; +import colors from 'styles/colors'; + +const cardStyles = ` + small { margin-top: 1rem; opacity: 0.5; } + a { + color: ${colors.textColor}; + } + details { + // display: inline; + display: flex; + transition: all 0.2s ease-in-out; + h3 { + display: inline; + } + summary { + padding: 0; + margin: 1rem 0 0 0; + cursor: pointer; + } + summary:before { + content: "►"; + position: absolute; + margin-left: -1rem; + color: ${colors.primary}; + cursor: pointer; + } + &[open] summary:before { + content: "▼"; + } + } +`; + +const getPathName = (link: string) => { + try { + const url = new URL(link); + return url.pathname; + } catch(e) { + return link; + } +}; + +const ContentLinksCard = (props: { data: any, title: string, actionButtons: any }): JSX.Element => { + const { internal, external} = props.data; + console.log('Internal Links', internal); + console.log('External Links', external); + return ( + + Summary + + + { internal && internal.length > 0 && ( +
+ Internal Links + {internal.map((link: string) => ( + + {getPathName(link)} + + ))} +
+ )} + { external && external.length > 0 && ( +
+ External Links + {external.map((link: string) => ( + + {link} + + ))} +
+ )} + {/* {portData.openPorts.map((port: any) => ( + + {port} + + ) + )} +
+ + Unable to establish connections to:
+ {portData.failedPorts.join(', ')} +
*/} +
+ ); +} + +export default ContentLinksCard; diff --git a/src/components/misc/ProgressBar.tsx b/src/components/misc/ProgressBar.tsx index d5bf0c7..1d2be06 100644 --- a/src/components/misc/ProgressBar.tsx +++ b/src/components/misc/ProgressBar.tsx @@ -202,6 +202,7 @@ const jobNames = [ 'sitemap', 'hsts', 'security-txt', + 'linked-pages', // 'whois', 'features', 'carbon', diff --git a/src/pages/Results.tsx b/src/pages/Results.tsx index ed2a1ee..0886a2c 100644 --- a/src/pages/Results.tsx +++ b/src/pages/Results.tsx @@ -1,5 +1,5 @@ import { useState, useEffect, useCallback, ReactNode } from 'react'; -import { useParams } from "react-router-dom"; +import { useParams } from 'react-router-dom'; import styled from 'styled-components'; import { ToastContainer } from 'react-toastify'; import Masonry from 'react-masonry-css' @@ -10,10 +10,15 @@ import Modal from 'components/Form/Modal'; import Footer from 'components/misc/Footer'; import Nav from 'components/Form/Nav'; import { RowProps } from 'components/Form/Row'; -import ErrorBoundary from 'components/misc/ErrorBoundary'; -import docs from 'utils/docs'; import Loader from 'components/misc/Loader'; +import ErrorBoundary from 'components/misc/ErrorBoundary'; +import SelfScanMsg from 'components/misc/SelfScanMsg'; +import DocContent from 'components/misc/DocContent'; +import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar'; +import ActionButtons from 'components/misc/ActionButtons'; +import AdditionalResources from 'components/misc/AdditionalResources'; +import ViewRaw from 'components/misc/ViewRaw'; import ServerLocationCard from 'components/Results/ServerLocation'; import ServerInfoCard from 'components/Results/ServerInfo'; @@ -40,17 +45,11 @@ import DomainLookup from 'components/Results/DomainLookup'; import DnsServerCard from 'components/Results/DnsServer'; import TechStackCard from 'components/Results/TechStack'; import SecurityTxtCard from 'components/Results/SecurityTxt'; -import SelfScanMsg from 'components/misc/SelfScanMsg'; +import ContentLinksCard from 'components/Results/ContentLinks'; - -import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar'; -import ActionButtons from 'components/misc/ActionButtons'; import keys from 'utils/get-keys'; import { determineAddressType, AddressType } from 'utils/address-type-checker'; - import useMotherHook from 'hooks/motherOfAllHooks'; - - import { getLocation, ServerLocation, parseCookies, Cookie, @@ -80,25 +79,6 @@ const ResultsContent = styled.section` padding-bottom: 1rem; `; -const JobDocsContainer = styled.div` -p.doc-desc, p.doc-uses, ul { - margin: 0.25rem auto 1.5rem auto; -} -ul { - padding: 0 0.5rem 0 1rem; -} -ul li a { - color: ${colors.primary}; -} -summary { color: ${colors.primary};} -h4 { - border-top: 1px solid ${colors.primary}; - color: ${colors.primary}; - opacity: 0.75; - padding: 0.5rem 0; -} -`; - const Results = (): JSX.Element => { const startTime = new Date().getTime(); @@ -400,6 +380,14 @@ const Results = (): JSX.Element => { fetchRequest: () => fetch(`${api}/dns-server?url=${address}`).then(res => parseJson(res)), }); + // Get list of links included in the page content + const [linkedPagesResults, updateLinkedPagesResults] = useMotherHook({ + jobId: 'linked-pages', + updateLoadingJobs, + addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly }, + fetchRequest: () => fetch(`${api}/content-links?url=${address}`).then(res => parseJson(res)), + }); + /* Cancel remaining jobs after 10 second timeout */ useEffect(() => { const checkJobs = () => { @@ -438,7 +426,6 @@ const Results = (): JSX.Element => { { id: 'server-info', title: 'Server Info', result: shoadnResults?.serverInfo, Component: ServerInfoCard, refresh: updateShodanResults }, { id: 'redirects', title: 'Redirects', result: redirectResults, Component: RedirectsCard, refresh: updateRedirectResults }, { id: 'robots-txt', title: 'Crawl Rules', result: robotsTxtResults, Component: RobotsTxtCard, refresh: updateRobotsTxtResults }, - { id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults }, { id: 'dnssec', title: 'DNSSEC', result: dnsSecResults, Component: DnsSecCard, refresh: updateDnsSecResults }, { id: 'status', title: 'Server Status', result: serverStatusResults, Component: ServerStatusCard, refresh: updateServerStatusResults }, { id: 'ports', title: 'Open Ports', result: portsResults, Component: OpenPortsCard, refresh: updatePortsResults }, @@ -448,8 +435,11 @@ const Results = (): JSX.Element => { { id: 'hsts', title: 'HSTS Check', result: hstsResults, Component: HstsCard, refresh: updateHstsResults }, { id: 'whois', title: 'Domain Info', result: whoIsResults, Component: WhoIsCard, refresh: updateWhoIsResults }, { id: 'dns-server', title: 'DNS Server', result: dnsServerResults, Component: DnsServerCard, refresh: updateDnsServerResults }, + { id: 'linked-pages', title: 'Linked Pages', result: linkedPagesResults, Component: ContentLinksCard, refresh: updateLinkedPagesResults }, { id: 'features', title: 'Site Features', result: siteFeaturesResults, Component: SiteFeaturesCard, refresh: updateSiteFeaturesResults }, + { id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults }, { id: 'carbon', title: 'Carbon Footprint', result: carbonResults, Component: CarbonFootprintCard, refresh: updateCarbonResults }, + ]; const MakeActionButtons = (title: string, refresh: () => void, showInfo: (id: string) => void): ReactNode => { @@ -463,34 +453,7 @@ const Results = (): JSX.Element => { }; const showInfo = (id: string) => { - const doc = docs.filter((doc: any) => doc.id === id)[0] || null; - setModalContent( - doc? ( - {doc.title} - About -

{doc.description}

- Use Cases -

{doc.use}

- Links - -
- Example - Screenshot -
-
) - : ( - -

No Docs provided for this widget yet

-
- )); + setModalContent(DocContent(id)); setModalOpen(true); }; @@ -534,6 +497,8 @@ const Results = (): JSX.Element => { } + +