1 min read
Web ScrapingAnti-Bot Detection: Scraping ohne Blockaden
Anti-Bot Detection Techniken überwinden. Fingerprinting, CAPTCHAs, Rate Limiting und Stealth-Strategien für Web Scraping.
Anti-Bot DetectionBot DetectionCAPTCHA BypassBrowser FingerprintingWeb ScrapingStealth

Anti-Bot Detection: Scraping ohne Blockaden
Meta-Description: Anti-Bot Detection Techniken überwinden. Fingerprinting, CAPTCHAs, Rate Limiting und Stealth-Strategien für Web Scraping.
Keywords: Anti-Bot Detection, Bot Detection, CAPTCHA Bypass, Browser Fingerprinting, Web Scraping, Stealth, Proxy Rotation
Einführung
Moderne Websites setzen ausgefeilte Anti-Bot-Systeme ein: Cloudflare, Akamai, PerimeterX, DataDome. Erfolgreiches Scraping erfordert Wissen über Detection-Mechanismen und ethische Umgehungsstrategien.
Detection-Mechanismen
┌─────────────────────────────────────────────────────────────┐
│ BOT DETECTION LAYERS │
├─────────────────────────────────────────────────────────────┤
│ │
│ Layer 1: IP-basiert │
│ ├── Datacenter IP Ranges (bekannt) │
│ ├── Request Rate pro IP │
│ ├── Geografische Inkonsistenzen │
│ └── IP Reputation Databases │
│ │
│ Layer 2: Browser Fingerprinting │
│ ├── navigator.webdriver │
│ ├── Chrome Automation Extensions │
│ ├── WebGL Fingerprint │
│ ├── Canvas Fingerprint │
│ ├── Audio Context Fingerprint │
│ └── Plugin/Font Enumeration │
│ │
│ Layer 3: Behavioral Analysis │
│ ├── Mouse Movement Patterns │
│ ├── Scroll Behavior │
│ ├── Click Timing │
│ ├── Keystroke Dynamics │
│ └── Session Duration │
│ │
│ Layer 4: JavaScript Challenges │
│ ├── CAPTCHAs (reCAPTCHA, hCaptcha) │
│ ├── JavaScript Execution Tests │
│ ├── Cookie/LocalStorage Checks │
│ └── TLS Fingerprinting │
│ │
└─────────────────────────────────────────────────────────────┘Browser Fingerprinting umgehen
import { chromium } from 'playwright';
async function createStealthBrowser() {
const browser = await chromium.launch({
headless: true,
args: [
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
});
const context = await browser.newContext({
userAgent: getRealisticUserAgent(),
viewport: getRealisticViewport(),
locale: 'de-DE',
timezoneId: 'Europe/Berlin',
geolocation: { latitude: 52.52, longitude: 13.405 },
permissions: ['geolocation'],
colorScheme: 'light',
deviceScaleFactor: 1
});
// Stealth Scripts
await context.addInitScript(() => {
// WebDriver Flag
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
// Chrome Automation
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Array;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Promise;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
// Languages
Object.defineProperty(navigator, 'languages', {
get: () => ['de-DE', 'de', 'en-US', 'en']
});
// Plugins (nicht leer)
Object.defineProperty(navigator, 'plugins', {
get: () => {
const plugins = [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai' },
{ name: 'Native Client', filename: 'internal-nacl-plugin' }
];
plugins.length = 3;
return plugins;
}
});
// Platform
Object.defineProperty(navigator, 'platform', {
get: () => 'Win32'
});
// Hardware Concurrency
Object.defineProperty(navigator, 'hardwareConcurrency', {
get: () => 8
});
// Device Memory
Object.defineProperty(navigator, 'deviceMemory', {
get: () => 8
});
// Permissions Query Override
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters: any) =>
parameters.name === 'notifications'
? Promise.resolve({ state: 'prompt' } as PermissionStatus)
: originalQuery(parameters);
// WebGL Vendor/Renderer
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {
if (parameter === 37445) return 'Intel Inc.';
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
return getParameter.call(this, parameter);
};
});
return { browser, context };
}
function getRealisticUserAgent(): string {
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0'
];
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
function getRealisticViewport() {
const viewports = [
{ width: 1920, height: 1080 },
{ width: 1366, height: 768 },
{ width: 1536, height: 864 },
{ width: 1440, height: 900 },
{ width: 1280, height: 720 }
];
return viewports[Math.floor(Math.random() * viewports.length)];
}Proxy Rotation
interface Proxy {
server: string;
username?: string;
password?: string;
}
class ProxyRotator {
private proxies: Proxy[];
private currentIndex = 0;
private failedProxies = new Set<string>();
constructor(proxies: Proxy[]) {
this.proxies = proxies;
}
getNext(): Proxy | null {
const startIndex = this.currentIndex;
do {
const proxy = this.proxies[this.currentIndex];
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
if (!this.failedProxies.has(proxy.server)) {
return proxy;
}
} while (this.currentIndex !== startIndex);
return null; // Alle Proxies fehlgeschlagen
}
markFailed(proxy: Proxy) {
this.failedProxies.add(proxy.server);
}
reset() {
this.failedProxies.clear();
}
}
// Verwendung
async function scrapeWithProxy(url: string, proxyRotator: ProxyRotator) {
const proxy = proxyRotator.getNext();
if (!proxy) throw new Error('No available proxies');
try {
const browser = await chromium.launch({
proxy: {
server: proxy.server,
username: proxy.username,
password: proxy.password
}
});
const page = await browser.newPage();
await page.goto(url);
// Scraping Logic...
await browser.close();
} catch (error) {
proxyRotator.markFailed(proxy);
throw error;
}
}
// Residential Proxies (Empfohlen für aggressive Sites)
const residentialProxies: Proxy[] = [
{ server: 'http://residential.proxy.com:8080', username: 'user', password: 'pass' },
// Residential Proxies sind teurer, aber schwerer zu detecten
];
// Datacenter Proxies (Günstiger, aber leichter zu erkennen)
const datacenterProxies: Proxy[] = [
{ server: 'http://dc.proxy.com:8080' },
];Rate Limiting & Request Timing
class RateLimiter {
private timestamps: number[] = [];
private maxRequests: number;
private windowMs: number;
constructor(maxRequests: number, windowMs: number) {
this.maxRequests = maxRequests;
this.windowMs = windowMs;
}
async waitForSlot(): Promise<void> {
const now = Date.now();
// Alte Timestamps entfernen
this.timestamps = this.timestamps.filter(t => now - t < this.windowMs);
if (this.timestamps.length >= this.maxRequests) {
const oldestTimestamp = this.timestamps[0];
const waitTime = this.windowMs - (now - oldestTimestamp);
if (waitTime > 0) {
await new Promise(r => setTimeout(r, waitTime));
}
}
this.timestamps.push(Date.now());
}
}
// Human-like Delays
function humanDelay(min: number = 1000, max: number = 3000): Promise<void> {
const delay = min + Math.random() * (max - min);
return new Promise(r => setTimeout(r, delay));
}
// Exponential Backoff bei Errors
async function withRetry<T>(
fn: () => Promise<T>,
maxRetries: number = 3,
baseDelay: number = 1000
): Promise<T> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await fn();
} catch (error) {
lastError = error as Error;
if (i < maxRetries - 1) {
const delay = baseDelay * Math.pow(2, i) + Math.random() * 1000;
console.log(`Retry ${i + 1}/${maxRetries} after ${delay}ms`);
await new Promise(r => setTimeout(r, delay));
}
}
}
throw lastError!;
}Human-like Behavior
import { Page } from 'playwright';
async function humanLikeBrowsing(page: Page) {
// Random Mouse Movement
await randomMouseMovement(page);
// Natural Scrolling
await naturalScroll(page);
// Realistic Click Behavior
await humanClick(page, 'button.submit');
}
async function randomMouseMovement(page: Page) {
const viewport = page.viewportSize()!;
for (let i = 0; i < 3; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: Math.floor(Math.random() * 10) + 5
});
await humanDelay(100, 300);
}
}
async function naturalScroll(page: Page) {
const scrollSteps = Math.floor(Math.random() * 5) + 2;
for (let i = 0; i < scrollSteps; i++) {
const scrollAmount = Math.floor(Math.random() * 300) + 100;
await page.mouse.wheel({ deltaY: scrollAmount });
await humanDelay(200, 500);
}
}
async function humanClick(page: Page, selector: string) {
const element = await page.$(selector);
if (!element) return;
const box = await element.boundingBox();
if (!box) return;
// Nicht exakt in der Mitte klicken
const x = box.x + box.width * (0.3 + Math.random() * 0.4);
const y = box.y + box.height * (0.3 + Math.random() * 0.4);
// Move then click
await page.mouse.move(x, y, { steps: 10 });
await humanDelay(50, 150);
await page.mouse.click(x, y);
}
// Realistic Typing
async function humanType(page: Page, selector: string, text: string) {
await page.click(selector);
await humanDelay(100, 200);
for (const char of text) {
await page.keyboard.type(char);
await humanDelay(50, 150); // Variable Typing Speed
// Gelegentliche Pausen
if (Math.random() < 0.1) {
await humanDelay(200, 500);
}
}
}Detection Testing
// Test gegen Detection Sites
async function testDetection(page: Page) {
const detectionSites = [
'https://bot.sannysoft.com',
'https://arh.antoinevastel.com/bots/areyouheadless',
'https://infosimples.github.io/detect-headless',
'https://browserleaks.com/canvas'
];
for (const site of detectionSites) {
await page.goto(site);
await page.screenshot({ path: `detection-${new URL(site).hostname}.png`, fullPage: true });
console.log(`Tested: ${site}`);
}
}
// Fingerprint Check
async function getFingerprint(page: Page) {
return await page.evaluate(() => {
return {
userAgent: navigator.userAgent,
webdriver: (navigator as any).webdriver,
languages: navigator.languages,
plugins: navigator.plugins.length,
platform: navigator.platform,
hardwareConcurrency: navigator.hardwareConcurrency,
deviceMemory: (navigator as any).deviceMemory,
cookieEnabled: navigator.cookieEnabled,
doNotTrack: navigator.doNotTrack,
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone
};
});
}CAPTCHA Handling
// CAPTCHA Detection
async function detectCaptcha(page: Page): Promise<string | null> {
const captchaSelectors = [
{ selector: '.g-recaptcha, [data-sitekey]', type: 'reCAPTCHA' },
{ selector: '.h-captcha', type: 'hCaptcha' },
{ selector: '#cf-turnstile', type: 'Turnstile' },
{ selector: '.captcha, #captcha', type: 'Generic' }
];
for (const { selector, type } of captchaSelectors) {
if (await page.$(selector)) {
return type;
}
}
return null;
}
// CAPTCHA Solving Service Integration
async function solveCaptcha(page: Page, captchaType: string) {
// Integration mit Services wie 2captcha, Anti-Captcha
// Ethische Verwendung beachten!
if (captchaType === 'reCAPTCHA') {
const sitekey = await page.$eval(
'[data-sitekey]',
el => el.getAttribute('data-sitekey')
);
// API Call zu Solving Service
const solution = await callCaptchaSolver({
type: 'recaptcha',
sitekey,
pageUrl: page.url()
});
// Solution einfügen
await page.evaluate((token) => {
(document.querySelector('#g-recaptcha-response') as HTMLTextAreaElement).value = token;
(window as any).___grecaptcha_cfg.clients[0].K.K.callback(token);
}, solution);
}
}Fazit
Erfolgreiche Anti-Detection erfordert:
- Layered Approach: Fingerprint + Behavior + Proxies
- Realismus: Human-like Patterns
- Rotation: Proxies, User-Agents, Fingerprints
- Respekt: Robots.txt, ToS, Rate Limits
Immer ethisch und legal scrapen!
Bildprompts
- "Shield blocking bot detection attempts, stealth browsing concept"
- "Human hand controlling browser puppet, behavior simulation"
- "Multiple masks representing different browser identities, fingerprint rotation"