diff --git a/api/content-links.js b/api/content-links.js new file mode 100644 index 0000000..34e4205 --- /dev/null +++ b/api/content-links.js @@ -0,0 +1,48 @@ +const axios = require('axios'); +const cheerio = require('cheerio'); +const urlLib = require('url'); + +exports.handler = async (event, context) => { + let url = event.queryStringParameters.url; + + // Check if url includes protocol + if (!url.startsWith('http://') && !url.startsWith('https://')) { + url = 'http://' + url; + } + + try { + const response = await axios.get(url); + const html = response.data; + const $ = cheerio.load(html); + const internalLinksMap = new Map(); + const externalLinksMap = new Map(); + + $('a[href]').each((i, link) => { + const href = $(link).attr('href'); + const absoluteUrl = urlLib.resolve(url, href); + + if (absoluteUrl.startsWith(url)) { + const count = internalLinksMap.get(absoluteUrl) || 0; + internalLinksMap.set(absoluteUrl, count + 1); + } else if (href.startsWith('http://') || href.startsWith('https://')) { + const count = externalLinksMap.get(absoluteUrl) || 0; + externalLinksMap.set(absoluteUrl, count + 1); + } + }); + + // Convert maps to sorted arrays + const internalLinks = [...internalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]); + const externalLinks = [...externalLinksMap.entries()].sort((a, b) => b[1] - a[1]).map(entry => entry[0]); + + return { + statusCode: 200, + body: JSON.stringify({ internal: internalLinks, external: externalLinks }), + }; + } catch (error) { + console.log(error); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Failed fetching data' }), + }; + } +}; diff --git a/package.json b/package.json index 1471758..b25c5cb 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "@types/react-simple-maps": "^3.0.0", "@types/styled-components": "^5.1.26", "axios": "^1.4.0", + "cheerio": "^1.0.0-rc.12", "chrome-aws-lambda": "^10.1.0", "flatted": "^3.2.7", "follow-redirects": "^1.15.2", diff --git a/src/components/Results/ContentLinks.tsx b/src/components/Results/ContentLinks.tsx new file mode 100644 index 0000000..d2ea0a5 --- /dev/null +++ b/src/components/Results/ContentLinks.tsx @@ -0,0 +1,89 @@ +import { Card } from 'components/Form/Card'; +import Row from 'components/Form/Row'; +import Heading from 'components/Form/Heading'; +import colors from 'styles/colors'; + +const cardStyles = ` + small { margin-top: 1rem; opacity: 0.5; } + a { + color: ${colors.textColor}; + } + details { + // display: inline; + display: flex; + transition: all 0.2s ease-in-out; + h3 { + display: inline; + } + summary { + padding: 0; + margin: 1rem 0 0 0; + cursor: pointer; + } + summary:before { + content: "►"; + position: absolute; + margin-left: -1rem; + color: ${colors.primary}; + cursor: pointer; + } + &[open] summary:before { + content: "▼"; + } + } +`; + +const getPathName = (link: string) => { + try { + const url = new URL(link); + return url.pathname; + } catch(e) { + return link; + } +}; + +const ContentLinksCard = (props: { data: any, title: string, actionButtons: any }): JSX.Element => { + const { internal, external} = props.data; + console.log('Internal Links', internal); + console.log('External Links', external); + return ( + + Summary + + + { internal && internal.length > 0 && ( + + Internal Links + {internal.map((link: string) => ( + + {getPathName(link)} + + ))} + + )} + { external && external.length > 0 && ( + + External Links + {external.map((link: string) => ( + + {link} + + ))} + + )} + {/* {portData.openPorts.map((port: any) => ( + + {port} + + ) + )} + + + Unable to establish connections to: + {portData.failedPorts.join(', ')} + */} + + ); +} + +export default ContentLinksCard; diff --git a/src/components/misc/ProgressBar.tsx b/src/components/misc/ProgressBar.tsx index d5bf0c7..1d2be06 100644 --- a/src/components/misc/ProgressBar.tsx +++ b/src/components/misc/ProgressBar.tsx @@ -202,6 +202,7 @@ const jobNames = [ 'sitemap', 'hsts', 'security-txt', + 'linked-pages', // 'whois', 'features', 'carbon', diff --git a/src/pages/Results.tsx b/src/pages/Results.tsx index ed2a1ee..0886a2c 100644 --- a/src/pages/Results.tsx +++ b/src/pages/Results.tsx @@ -1,5 +1,5 @@ import { useState, useEffect, useCallback, ReactNode } from 'react'; -import { useParams } from "react-router-dom"; +import { useParams } from 'react-router-dom'; import styled from 'styled-components'; import { ToastContainer } from 'react-toastify'; import Masonry from 'react-masonry-css' @@ -10,10 +10,15 @@ import Modal from 'components/Form/Modal'; import Footer from 'components/misc/Footer'; import Nav from 'components/Form/Nav'; import { RowProps } from 'components/Form/Row'; -import ErrorBoundary from 'components/misc/ErrorBoundary'; -import docs from 'utils/docs'; import Loader from 'components/misc/Loader'; +import ErrorBoundary from 'components/misc/ErrorBoundary'; +import SelfScanMsg from 'components/misc/SelfScanMsg'; +import DocContent from 'components/misc/DocContent'; +import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar'; +import ActionButtons from 'components/misc/ActionButtons'; +import AdditionalResources from 'components/misc/AdditionalResources'; +import ViewRaw from 'components/misc/ViewRaw'; import ServerLocationCard from 'components/Results/ServerLocation'; import ServerInfoCard from 'components/Results/ServerInfo'; @@ -40,17 +45,11 @@ import DomainLookup from 'components/Results/DomainLookup'; import DnsServerCard from 'components/Results/DnsServer'; import TechStackCard from 'components/Results/TechStack'; import SecurityTxtCard from 'components/Results/SecurityTxt'; -import SelfScanMsg from 'components/misc/SelfScanMsg'; +import ContentLinksCard from 'components/Results/ContentLinks'; - -import ProgressBar, { LoadingJob, LoadingState, initialJobs } from 'components/misc/ProgressBar'; -import ActionButtons from 'components/misc/ActionButtons'; import keys from 'utils/get-keys'; import { determineAddressType, AddressType } from 'utils/address-type-checker'; - import useMotherHook from 'hooks/motherOfAllHooks'; - - import { getLocation, ServerLocation, parseCookies, Cookie, @@ -80,25 +79,6 @@ const ResultsContent = styled.section` padding-bottom: 1rem; `; -const JobDocsContainer = styled.div` -p.doc-desc, p.doc-uses, ul { - margin: 0.25rem auto 1.5rem auto; -} -ul { - padding: 0 0.5rem 0 1rem; -} -ul li a { - color: ${colors.primary}; -} -summary { color: ${colors.primary};} -h4 { - border-top: 1px solid ${colors.primary}; - color: ${colors.primary}; - opacity: 0.75; - padding: 0.5rem 0; -} -`; - const Results = (): JSX.Element => { const startTime = new Date().getTime(); @@ -400,6 +380,14 @@ const Results = (): JSX.Element => { fetchRequest: () => fetch(`${api}/dns-server?url=${address}`).then(res => parseJson(res)), }); + // Get list of links included in the page content + const [linkedPagesResults, updateLinkedPagesResults] = useMotherHook({ + jobId: 'linked-pages', + updateLoadingJobs, + addressInfo: { address, addressType, expectedAddressTypes: urlTypeOnly }, + fetchRequest: () => fetch(`${api}/content-links?url=${address}`).then(res => parseJson(res)), + }); + /* Cancel remaining jobs after 10 second timeout */ useEffect(() => { const checkJobs = () => { @@ -438,7 +426,6 @@ const Results = (): JSX.Element => { { id: 'server-info', title: 'Server Info', result: shoadnResults?.serverInfo, Component: ServerInfoCard, refresh: updateShodanResults }, { id: 'redirects', title: 'Redirects', result: redirectResults, Component: RedirectsCard, refresh: updateRedirectResults }, { id: 'robots-txt', title: 'Crawl Rules', result: robotsTxtResults, Component: RobotsTxtCard, refresh: updateRobotsTxtResults }, - { id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults }, { id: 'dnssec', title: 'DNSSEC', result: dnsSecResults, Component: DnsSecCard, refresh: updateDnsSecResults }, { id: 'status', title: 'Server Status', result: serverStatusResults, Component: ServerStatusCard, refresh: updateServerStatusResults }, { id: 'ports', title: 'Open Ports', result: portsResults, Component: OpenPortsCard, refresh: updatePortsResults }, @@ -448,8 +435,11 @@ const Results = (): JSX.Element => { { id: 'hsts', title: 'HSTS Check', result: hstsResults, Component: HstsCard, refresh: updateHstsResults }, { id: 'whois', title: 'Domain Info', result: whoIsResults, Component: WhoIsCard, refresh: updateWhoIsResults }, { id: 'dns-server', title: 'DNS Server', result: dnsServerResults, Component: DnsServerCard, refresh: updateDnsServerResults }, + { id: 'linked-pages', title: 'Linked Pages', result: linkedPagesResults, Component: ContentLinksCard, refresh: updateLinkedPagesResults }, { id: 'features', title: 'Site Features', result: siteFeaturesResults, Component: SiteFeaturesCard, refresh: updateSiteFeaturesResults }, + { id: 'sitemap', title: 'Pages', result: sitemapResults, Component: SitemapCard, refresh: updateSitemapResults }, { id: 'carbon', title: 'Carbon Footprint', result: carbonResults, Component: CarbonFootprintCard, refresh: updateCarbonResults }, + ]; const MakeActionButtons = (title: string, refresh: () => void, showInfo: (id: string) => void): ReactNode => { @@ -463,34 +453,7 @@ const Results = (): JSX.Element => { }; const showInfo = (id: string) => { - const doc = docs.filter((doc: any) => doc.id === id)[0] || null; - setModalContent( - doc? ( - {doc.title} - About - {doc.description} - Use Cases - {doc.use} - Links - - {doc.resources.map((resource: string | { title: string, link: string } , index: number) => ( - typeof resource === 'string' ? ( - {resource} - ) : ( - {resource.title} - ) - ))} - - - Example - - - ) - : ( - - No Docs provided for this widget yet - - )); + setModalContent(DocContent(id)); setModalOpen(true); }; @@ -534,6 +497,8 @@ const Results = (): JSX.Element => { } + + setModalOpen(false)}>{modalContent} diff --git a/yarn.lock b/yarn.lock index b035d7b..0a23e79 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5256,6 +5256,31 @@ check-types@^11.1.1: resolved "https://registry.yarnpkg.com/check-types/-/check-types-11.2.2.tgz#7afc0b6a860d686885062f2dba888ba5710335b4" integrity sha512-HBiYvXvn9Z70Z88XKjz3AEKd4HJhBXsa3j7xFnITAzoS8+q6eIGi8qDB8FKPBAjtuxjI/zFpwuiCb8oDtKOYrA== +cheerio-select@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4" + integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g== + dependencies: + boolbase "^1.0.0" + css-select "^5.1.0" + css-what "^6.1.0" + domelementtype "^2.3.0" + domhandler "^5.0.3" + domutils "^3.0.1" + +cheerio@^1.0.0-rc.12: + version "1.0.0-rc.12" + resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" + integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== + dependencies: + cheerio-select "^2.1.0" + dom-serializer "^2.0.0" + domhandler "^5.0.3" + domutils "^3.0.1" + htmlparser2 "^8.0.1" + parse5 "^7.0.0" + parse5-htmlparser2-tree-adapter "^7.0.0" + chokidar@3.5.3, chokidar@^3.4.0, chokidar@^3.4.2, chokidar@^3.5.3: version "3.5.3" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd" @@ -5976,6 +6001,17 @@ css-select@^4.1.3: domutils "^2.8.0" nth-check "^2.0.1" +css-select@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6" + integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg== + dependencies: + boolbase "^1.0.0" + css-what "^6.1.0" + domhandler "^5.0.2" + domutils "^3.0.1" + nth-check "^2.0.1" + css-to-react-native@^3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/css-to-react-native/-/css-to-react-native-3.2.0.tgz#cdd8099f71024e149e4f6fe17a7d46ecd55f1e32" @@ -6006,7 +6042,7 @@ css-what@^3.2.1: resolved "https://registry.yarnpkg.com/css-what/-/css-what-3.4.2.tgz#ea7026fcb01777edbde52124e21f327e7ae950e4" integrity sha512-ACUm3L0/jiZTqfzRM3Hi9Q8eZqd6IK37mMWPLz9PJxkLWllYeRf+EHUSHYEtFop2Eqytaq1FizFVh7XfBnXCDQ== -css-what@^6.0.1: +css-what@^6.0.1, css-what@^6.1.0: version "6.1.0" resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== @@ -6593,12 +6629,21 @@ dom-serializer@^1.0.1: domhandler "^4.2.0" entities "^2.0.0" +dom-serializer@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53" + integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.2" + entities "^4.2.0" + domelementtype@1: version "1.3.1" resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f" integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w== -domelementtype@^2.0.1, domelementtype@^2.2.0: +domelementtype@^2.0.1, domelementtype@^2.2.0, domelementtype@^2.3.0: version "2.3.0" resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d" integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== @@ -6617,6 +6662,13 @@ domhandler@^4.0.0, domhandler@^4.2.0, domhandler@^4.3.1: dependencies: domelementtype "^2.2.0" +domhandler@^5.0.2, domhandler@^5.0.3: + version "5.0.3" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31" + integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w== + dependencies: + domelementtype "^2.3.0" + domutils@^1.7.0: version "1.7.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a" @@ -6634,6 +6686,15 @@ domutils@^2.5.2, domutils@^2.8.0: domelementtype "^2.2.0" domhandler "^4.2.0" +domutils@^3.0.1: + version "3.1.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e" + integrity sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA== + dependencies: + dom-serializer "^2.0.0" + domelementtype "^2.3.0" + domhandler "^5.0.3" + dot-case@^3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751" @@ -6770,6 +6831,11 @@ entities@^2.0.0, entities@^2.2.0: resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== +entities@^4.2.0, entities@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + env-paths@3.0.0, env-paths@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-3.0.0.tgz#2f1e89c2f6dbd3408e1b1711dd82d62e317f58da" @@ -8542,6 +8608,16 @@ htmlparser2@^6.1.0: domutils "^2.5.2" entities "^2.0.0" +htmlparser2@^8.0.1: + version "8.0.2" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21" + integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.3" + domutils "^3.0.1" + entities "^4.4.0" + http-cache-semantics@^4.1.1: version "4.1.1" resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz#abe02fcb2985460bf0323be664436ec3476a6d5a" @@ -11800,11 +11876,26 @@ parse-ms@^3.0.0: resolved "https://registry.yarnpkg.com/parse-ms/-/parse-ms-3.0.0.tgz#3ea24a934913345fcc3656deda72df921da3a70e" integrity sha512-Tpb8Z7r7XbbtBTrM9UhpkzzaMrqA2VXMT3YChzYltwV3P3pM6t8wl7TvpMnSTosz1aQAdVib7kdoys7vYOPerw== +parse5-htmlparser2-tree-adapter@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1" + integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g== + dependencies: + domhandler "^5.0.2" + parse5 "^7.0.0" + parse5@6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== +parse5@^7.0.0: + version "7.1.2" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32" + integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw== + dependencies: + entities "^4.4.0" + parseurl@~1.3.2, parseurl@~1.3.3: version "1.3.3" resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
{doc.description}
{doc.use}
No Docs provided for this widget yet