1 min read
AutomatisierungPlaywright Web Scraping: Stealth & Performance Guide
Playwright für Web Scraping 2026. Stealth-Techniken, Anti-Bot Detection, Browser Fingerprinting und Performance-Optimierung.
PlaywrightWeb ScrapingStealthBrowser AutomationAnti-BotHeadless Browser

Playwright Web Scraping: Stealth & Performance Guide
Meta-Description: Playwright für Web Scraping 2026. Stealth-Techniken, Anti-Bot Detection, Browser Fingerprinting und Performance-Optimierung.
Keywords: Playwright, Web Scraping, Stealth, Browser Automation, Anti-Bot, Headless Browser, Data Extraction
Einführung
Playwright ist das moderne Standard-Tool für Web Scraping. Mit WebSocket-basierter Kommunikation, Native Network Interception und Multi-Browser-Support bietet es alles für skalierbare Datenextraktion.
Playwright vs Alternativen
┌─────────────────────────────────────────────────────────────┐
│ SCRAPING TOOLS 2026 │
├─────────────────────────────────────────────────────────────┤
│ │
│ PLAYWRIGHT │
│ ├── WebSocket-First (schneller als HTTP) │
│ ├── Multi-Browser (Chrome, Firefox, WebKit) │
│ ├── Native Request Interception │
│ ├── Auto-Waiting │
│ └── Best for: Modern JS Sites, Stealth │
│ │
│ PUPPETEER │
│ ├── Chrome DevTools Protocol │
│ ├── Chrome/Firefox Support │
│ ├── Größere Community │
│ └── Best for: Chrome-specific Features │
│ │
│ CHEERIO │
│ ├── Kein Browser (nur HTML Parsing) │
│ ├── Extrem schnell │
│ ├── Niedrige Ressourcen │
│ └── Best for: Static Sites │
│ │
└─────────────────────────────────────────────────────────────┘Basic Setup
npm install playwright
npx playwright install # Browser installieren// scraper.ts
import { chromium, Browser, Page } from 'playwright';
async function scrape() {
const browser = await chromium.launch({
headless: true // 'new' ist jetzt default
});
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
viewport: { width: 1920, height: 1080 },
locale: 'de-DE',
timezoneId: 'Europe/Berlin'
});
const page = await context.newPage();
try {
await page.goto('https://example.com', {
waitUntil: 'networkidle',
timeout: 30000
});
// Scraping Logic
const data = await page.evaluate(() => {
return {
title: document.title,
headings: Array.from(document.querySelectorAll('h1, h2'))
.map(h => h.textContent)
};
});
return data;
} finally {
await browser.close();
}
}Stealth Mode
// playwright-extra für Stealth Plugins
import { chromium } from 'playwright-extra';
import stealth from 'puppeteer-extra-plugin-stealth';
chromium.use(stealth());
async function stealthScrape(url: string) {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
// Realistische Browser-Einstellungen
userAgent: getRandomUserAgent(),
viewport: getRandomViewport(),
locale: 'de-DE',
timezoneId: 'Europe/Berlin',
geolocation: { latitude: 52.52, longitude: 13.405 },
permissions: ['geolocation'],
// WebGL Fingerprint
deviceScaleFactor: 1,
hasTouch: false,
isMobile: false
});
// WebDriver Flag entfernen
await context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
// Chrome-spezifische Properties
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5]
});
Object.defineProperty(navigator, 'languages', {
get: () => ['de-DE', 'de', 'en-US', 'en']
});
// Automation Detection Override
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Array;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Promise;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
});
const page = await context.newPage();
await page.goto(url);
return { page, browser, context };
}
// Random User Agents
function getRandomUserAgent(): string {
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0'
];
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
function getRandomViewport() {
const viewports = [
{ width: 1920, height: 1080 },
{ width: 1366, height: 768 },
{ width: 1536, height: 864 },
{ width: 1440, height: 900 }
];
return viewports[Math.floor(Math.random() * viewports.length)];
}Network Interception
// Ressourcen blockieren für Speed
async function fastScrape(url: string) {
const browser = await chromium.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Unnötige Ressourcen blockieren
await page.route('**/*', (route) => {
const resourceType = route.request().resourceType();
if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
return route.abort();
}
// Tracking Scripts blockieren
const url = route.request().url();
if (
url.includes('analytics') ||
url.includes('tracking') ||
url.includes('ads')
) {
return route.abort();
}
return route.continue();
});
await page.goto(url, { waitUntil: 'domcontentloaded' });
return page;
}
// API Responses abfangen
async function interceptApi(page: Page) {
const apiResponses: any[] = [];
page.on('response', async (response) => {
const url = response.url();
if (url.includes('/api/') && response.ok()) {
try {
const json = await response.json();
apiResponses.push({
url,
data: json
});
} catch {}
}
});
return apiResponses;
}
// Request modifizieren
await page.route('**/api/**', (route) => {
const headers = {
...route.request().headers(),
'Authorization': 'Bearer token123',
'X-Custom-Header': 'value'
};
route.continue({ headers });
});Selektoren & Datenextraktion
// Moderne Selektoren
async function extractData(page: Page) {
// CSS Selektoren
const title = await page.locator('h1').textContent();
// Text-basierte Selektoren
const loginButton = page.getByRole('button', { name: 'Login' });
const emailInput = page.getByLabel('E-Mail');
const link = page.getByText('Mehr erfahren');
// Multiple Elemente
const prices = await page.locator('.price').allTextContents();
// Attribute extrahieren
const links = await page.locator('a').evaluateAll(
(elements) => elements.map(el => ({
href: el.getAttribute('href'),
text: el.textContent?.trim()
}))
);
// Tabellen scrapen
const tableData = await page.evaluate(() => {
const rows = document.querySelectorAll('table tbody tr');
return Array.from(rows).map(row => {
const cells = row.querySelectorAll('td');
return Array.from(cells).map(cell => cell.textContent?.trim());
});
});
// Strukturierte Daten (JSON-LD)
const jsonLd = await page.evaluate(() => {
const script = document.querySelector('script[type="application/ld+json"]');
return script ? JSON.parse(script.textContent || '{}') : null;
});
return { title, prices, links, tableData, jsonLd };
}
// Warten auf dynamischen Content
async function waitForContent(page: Page) {
// Auf Element warten
await page.waitForSelector('.product-list', { state: 'visible' });
// Auf Network Idle
await page.waitForLoadState('networkidle');
// Auf bestimmte Anzahl Elemente
await page.locator('.product-card').first().waitFor();
// Custom Condition
await page.waitForFunction(() => {
return document.querySelectorAll('.product-card').length >= 10;
});
}Pagination & Infinite Scroll
// Pagination
async function scrapePaginated(baseUrl: string, maxPages: number = 10) {
const browser = await chromium.launch();
const page = await browser.newPage();
const allData: any[] = [];
for (let i = 1; i <= maxPages; i++) {
await page.goto(`${baseUrl}?page=${i}`);
const pageData = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.item')).map(el => ({
title: el.querySelector('.title')?.textContent,
price: el.querySelector('.price')?.textContent
}));
});
if (pageData.length === 0) break; // Keine Daten mehr
allData.push(...pageData);
// Rate Limiting
await page.waitForTimeout(1000 + Math.random() * 2000);
}
await browser.close();
return allData;
}
// Infinite Scroll
async function scrapeInfiniteScroll(url: string, maxScrolls: number = 20) {
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto(url);
let previousHeight = 0;
let scrollCount = 0;
while (scrollCount < maxScrolls) {
// Scroll to bottom
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
// Auf neue Inhalte warten
await page.waitForTimeout(2000);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
break; // Keine neuen Inhalte
}
previousHeight = currentHeight;
scrollCount++;
}
// Alle Daten extrahieren
const data = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.item')).map(/* ... */);
});
await browser.close();
return data;
}Proxy & Session Management
// Proxy Setup
const browser = await chromium.launch({
proxy: {
server: 'http://proxy.example.com:8080',
username: 'user',
password: 'pass'
}
});
// Rotating Proxies
async function withRotatingProxy(urls: string[]) {
const proxies = [
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080',
'http://proxy3.example.com:8080'
];
for (const url of urls) {
const proxy = proxies[Math.floor(Math.random() * proxies.length)];
const browser = await chromium.launch({
proxy: { server: proxy }
});
try {
const page = await browser.newPage();
await page.goto(url);
// Scrape...
} finally {
await browser.close();
}
}
}
// Session/Cookie Persistence
async function persistSession() {
// Session speichern
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://example.com/login');
// Login durchführen...
// Cookies speichern
const cookies = await context.cookies();
await fs.writeFile('cookies.json', JSON.stringify(cookies));
// Session wiederherstellen
const newContext = await browser.newContext();
const savedCookies = JSON.parse(await fs.readFile('cookies.json', 'utf8'));
await newContext.addCookies(savedCookies);
}Parallel Scraping
import { chromium, Browser } from 'playwright';
import pLimit from 'p-limit';
async function parallelScrape(urls: string[], concurrency: number = 5) {
const browser = await chromium.launch();
const limit = pLimit(concurrency);
const results = await Promise.all(
urls.map(url =>
limit(async () => {
const context = await browser.newContext();
const page = await context.newPage();
try {
await page.goto(url, { timeout: 30000 });
const data = await extractData(page);
return { url, data, success: true };
} catch (error) {
return { url, error: (error as Error).message, success: false };
} finally {
await context.close();
}
})
)
);
await browser.close();
return results;
}Fazit
Playwright Web Scraping 2026:
- Stealth First: Anti-Detection von Anfang an
- Performance: Resource Blocking, Parallel Scraping
- Robustheit: Auto-Waiting, Retries, Error Handling
- Compliance: Robots.txt respektieren, Rate Limiting
Immer rechtliche Aspekte und Terms of Service beachten.
Bildprompts
- "Spider crawling through web pages extracting data, web scraping concept"
- "Browser automation with invisible robot, stealth scraping"
- "Multiple parallel processes scraping different websites, concurrent extraction"