Add LWN News section
This commit is contained in:
parent
9b23f46bff
commit
91cc2bd663
12 changed files with 2130 additions and 4 deletions
96
index.mjs
96
index.mjs
|
|
@ -14,6 +14,70 @@ import devTools from './src/com.chrome.devtools.mjs';
|
|||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
const decodeHtml = (text) => text
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/—/g, '-')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/…/g, '...')
|
||||
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code)));
|
||||
|
||||
const stripHtml = (text) => decodeHtml(text
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/\s+([,.;:!?])/g, '$1')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim());
|
||||
|
||||
const trimBlurb = (text, maxLength = 120) => {
|
||||
if (text.length <= maxLength) return text;
|
||||
const shortened = text.slice(0, maxLength);
|
||||
const lastSpace = shortened.lastIndexOf(' ');
|
||||
return `${shortened.slice(0, lastSpace > 0 ? lastSpace : maxLength)}...`;
|
||||
};
|
||||
|
||||
const parseLwnStories = (html) => {
|
||||
const headingRegex = /<h2[^>]*>([\s\S]*?)<\/h2>/gi;
|
||||
const headings = [...html.matchAll(headingRegex)];
|
||||
const stories = [];
|
||||
|
||||
headings.forEach((match, index) => {
|
||||
if (stories.length >= 8) return;
|
||||
|
||||
const headingHtml = match[1];
|
||||
const start = match.index + match[0].length;
|
||||
const end = headings[index + 1]?.index ?? html.length;
|
||||
const sectionHtml = html.slice(start, end);
|
||||
|
||||
const headline = stripHtml(headingHtml).replace(/^\[\s*\$\s*\]\s*/, '');
|
||||
if (!headline || headline === 'Welcome to LWN.net') return;
|
||||
|
||||
const hrefMatch = headingHtml.match(/href="([^"]+)"/i)
|
||||
?? sectionHtml.match(/href="(\/Articles\/[^"#]+)"/i);
|
||||
const paragraphMatches = [...sectionHtml.matchAll(/<p[^>]*>([\s\S]*?)<\/p>/gi)];
|
||||
const blurb = paragraphMatches
|
||||
.map((paragraph) => stripHtml(paragraph[1]))
|
||||
.find((paragraph) => paragraph && !paragraph.startsWith('Posted ') && !paragraph.startsWith('Read more'));
|
||||
|
||||
if (!blurb) return;
|
||||
|
||||
stories.push({
|
||||
headline,
|
||||
blurb: trimBlurb(blurb),
|
||||
url: hrefMatch ? new URL(hrefMatch[1], 'https://lwn.net/').toString() : 'https://lwn.net/',
|
||||
});
|
||||
});
|
||||
|
||||
return stories;
|
||||
};
|
||||
|
||||
const travelCities = JSON.parse(await readFile('./datagenerators/output/travelcities.json'));
|
||||
const regionalCities = JSON.parse(await readFile('./datagenerators/output/regionalcities.json'));
|
||||
const stationInfo = JSON.parse(await readFile('./datagenerators/output/stations.json'));
|
||||
|
|
@ -143,6 +207,38 @@ if (!process.env?.STATIC) {
|
|||
}
|
||||
});
|
||||
|
||||
app.get('/api/linux-news', async (req, res) => {
|
||||
try {
|
||||
const response = await fetch('https://lwn.net/', {
|
||||
headers: {
|
||||
'User-Agent': `ws4kp/${version}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`LWN request failed with status ${response.status}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const stories = parseLwnStories(html);
|
||||
|
||||
if (stories.length === 0) {
|
||||
throw new Error('No LWN stories found');
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
stories,
|
||||
});
|
||||
} catch (error) {
|
||||
res.json({
|
||||
success: false,
|
||||
stories: [],
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
app.use('/api/', weatherProxy);
|
||||
|
||||
// Cache management DELETE endpoint to allow "uncaching" specific URLs
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue