Adds gen har, network requests, meta tags

This commit is contained in:
Alicia Sykes 2023-07-30 02:16:08 +01:00
parent 507fade2f8
commit c1daa4799c
3 changed files with 170 additions and 0 deletions

54
api/generate-har.js Normal file
View file

@ -0,0 +1,54 @@
const puppeteer = require('puppeteer-core');
const chromium = require('chrome-aws-lambda');
exports.handler = async (event, context) => {
let browser = null;
let result = null;
let code = 200;
try {
const url = event.queryStringParameters.url;
browser = await chromium.puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless,
});
const page = await browser.newPage();
const requests = [];
// Capture requests
page.on('request', request => {
requests.push({
url: request.url(),
method: request.method(),
headers: request.headers(),
});
});
await page.goto(url, {
waitUntil: 'networkidle0', // wait until all requests are finished
});
result = requests;
} catch (error) {
code = 500;
result = {
error: 'Failed to create HAR file',
details: error.toString(),
};
} finally {
if (browser !== null) {
await browser.close();
}
}
return {
statusCode: code,
body: JSON.stringify(result),
};
};

58
api/meta-tags.js Normal file
View file

@ -0,0 +1,58 @@
const axios = require('axios');
const cheerio = require('cheerio');
exports.handler = async (event) => {
let url;
try {
// Add https:// prefix if not present
url = new URL(event.queryStringParameters.url);
if (!url.protocol) {
url = new URL('https://' + event.queryStringParameters.url);
}
} catch (error) {
// Return error if URL is not valid
return {
statusCode: 400,
body: JSON.stringify({
error: 'Invalid URL provided.',
}),
};
}
try {
// Fetch the page
const response = await axios.get(url.toString());
// Parse the page body with cheerio
const $ = cheerio.load(response.data);
// Extract meta tags
const metaTags = {};
$('head meta').each((index, element) => {
const name = $(element).attr('name');
const property = $(element).attr('property');
const content = $(element).attr('content');
if (name) {
metaTags[name] = content;
} else if (property) {
metaTags[property] = content;
}
});
// Return meta tags
return {
statusCode: 200,
body: JSON.stringify(metaTags),
};
} catch (error) {
// Return error if there's a problem fetching or parsing the page
return {
statusCode: 500,
body: JSON.stringify({
error: error.message,
}),
};
}
};

58
api/network-requests.js Normal file
View file

@ -0,0 +1,58 @@
const puppeteer = require('puppeteer-core');
const chromium = require('chrome-aws-lambda');
exports.handler = async (event, context) => {
const urlParam = event.queryStringParameters.url;
if (!urlParam) {
return {
statusCode: 400,
body: JSON.stringify({ error: 'Missing url parameter' })
};
}
let url;
try {
url = new URL(urlParam.includes('://') ? urlParam : 'https://' + urlParam);
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: 'Invalid URL format' }),
};
}
// Launch the browser and open a new page
const browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: { width: 800, height: 600 },
executablePath: process.env.CHROME_PATH || await chromium.executablePath,
headless: chromium.headless,
ignoreHTTPSErrors: true,
ignoreDefaultArgs: ['--disable-extensions'],
});
const page = await browser.newPage();
// To store network activity
let networkActivity = [];
// Register an event listener for network requests
page.on('request', (request) => {
networkActivity.push({
url: request.url(),
method: request.method(),
headers: request.headers(),
postData: request.postData(),
});
});
// Navigate to the page and wait for it to load
await page.goto(url, { waitUntil: 'networkidle2' });
// Close the browser
await browser.close();
// Return network activity
return {
statusCode: 200,
body: JSON.stringify(networkActivity),
};
};