From d8a3806890b3628e6d8cf81dd3b2f8af7955ae45 Mon Sep 17 00:00:00 2001 From: buffge Date: Thu, 28 Aug 2025 22:10:34 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81=20?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0esbuild=20biome?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 8 +- .vscode/settings.json | 20 +- .vscode/tasks.json | 30 ++ 1.sh | 17 + biome.jsonc | 58 +++ esbuild.js | 21 + package.json | 31 +- runtime/screenshot/.gitignore | 2 - src/Spider.ts | 89 ++-- src/app.ts | 185 ++++++++ src/config/app.ts | 90 ++-- src/index.ts | 9 +- src/interface/Spider.ts | 31 -- src/sites/sina/index.ts | 148 ------- src/type.d.ts | 7 + tsconfig.json | 86 ++-- yarn.lock | 766 ++++++++++++++++++++++++++++++++++ 17 files changed, 1258 insertions(+), 340 deletions(-) create mode 100644 .vscode/tasks.json create mode 100755 1.sh create mode 100644 biome.jsonc create mode 100644 esbuild.js delete mode 100644 runtime/screenshot/.gitignore create mode 100644 src/app.ts delete mode 100644 src/interface/Spider.ts delete mode 100644 src/sites/sina/index.ts create mode 100644 src/type.d.ts create mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore index a7d127b..c45cefb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,3 @@ node_modules/ -build/ -yarn.lock -package-lock.json -cookie.ts -cookies.ts - +dist/ +logs diff --git a/.vscode/settings.json b/.vscode/settings.json index 1c41efc..b3875b3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,17 +1,7 @@ { "typescript.tsdk": "node_modules\\typescript\\lib", - "cSpell.words": [ - "KHTML", - "laruence", - "sina" - ], - "cSpell.ignorePaths": [ - "**/package-lock.json", - "**/node_modules/**", - "**/vscode-extension/**", - "**/.git/objects/**", - ".vscode", - "**/cookie.ts", - "src/config/app.ts" - ] -} + "editor.codeActionsOnSave": { + "source.organizeImports.biome": "explicit", + "source.fixAll.biome": "explicit" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..351270b --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,30 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "typescript", + "tsconfig": "tsconfig.json", + "option": "watch", + "problemMatcher": [ + "$tsc-watch" + ], + "group": "build", + "label": "tsc: 监视 - tsconfig.json" + }, + { + "label": "esbuild: watch", + "type": "shell", + "command": "node", + "args": [ + "esbuild.js", + ], + "group": { + "kind": "build", + "isDefault": true // 设为默认构建任务(Ctrl+Shift+B 直接运行) + }, + + "problemMatcher": [], // 不匹配问题(esbuild 错误会直接显示) + "detail": "使用 esbuild watch 打包项目" + } + ] +} \ No newline at end of file diff --git a/1.sh b/1.sh new file mode 100755 index 0000000..ce03dba --- /dev/null +++ b/1.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# 定义起始值和执行次数 +START_UID=1001 +DELAY=7200.0 +COUNT=10 # 执行的次数,可以根据需要修改 + +# 循环执行命令 +for ((i=0; i "./logs/$S_UID.log" 2>&1 & + # sleep 1 +done + +echo "所有实例已启动" diff --git a/biome.jsonc b/biome.jsonc new file mode 100644 index 0000000..96c2d63 --- /dev/null +++ b/biome.jsonc @@ -0,0 +1,58 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.2.2/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "ignoreUnknown": true, + "includes": [ + "./src/**" + ] + }, + "formatter": { + "enabled": true, + "indentWidth": 2, + "indentStyle": "space", + "lineEnding": "lf", + "lineWidth": 120, + "attributePosition": "auto" + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true, + "suspicious": { + "noExplicitAny": "off" + }, + "correctness": { + "useExhaustiveDependencies": "off" + } + } + }, + "javascript": { + "globals": [ + "$", + "_" + ], + "formatter": { + "arrowParentheses": "asNeeded", + "bracketSameLine": false, + "bracketSpacing": true, + "jsxQuoteStyle": "double", + "quoteProperties": "asNeeded", + "semicolons": "asNeeded", + "trailingCommas": "all", + "quoteStyle": "single" + } + }, + "assist": { + "enabled": true, + "actions": { + "source": { + "organizeImports": "on" + } + } + } +} \ No newline at end of file diff --git a/esbuild.js b/esbuild.js new file mode 100644 index 0000000..bab83e4 --- /dev/null +++ b/esbuild.js @@ -0,0 +1,21 @@ +import * as esbuild from 'esbuild' + +let ctx = await esbuild.context({ + entryPoints: ['src/index.ts'], + tsconfig: 'tsconfig.json', + bundle: true, + minify: true, + sourcemap: false, + format: "esm", + charset: "utf8", + target: ['node22'], + platform: "node", + legalComments: "none", + outfile: 'dist/bundle.js', + banner: { + js: "import { createRequire as topLevelCreateRequire } from 'module'; const require = topLevelCreateRequire(import.meta.url);" + }, +}) + +await ctx.watch() +console.log('watching...') diff --git a/package.json b/package.json index 300cc4c..4270ae5 100644 --- a/package.json +++ b/package.json @@ -1,31 +1,24 @@ { "name": "spider", "version": "1.0.0", - "main": "index.js", "repository": "https://github.com/buffge/spider", - "author": "buffge <1515888956@qq.com>", + "author": "buffge ", "license": "MIT", - "private": false, + "private": true, + "type": "module", "dependencies": { - "delay": "^4.3.0", - "jquery": "^3.4.1", - "module-alias": "^2.2.2", - "moment": "^2.24.0", - "puppeteer-core": "^1.20.0" + "puppeteer-core": "^24.17.0" }, "devDependencies": { - "@types/jquery": "^3.3.31", - "@types/node": "^12.7.12", - "@types/puppeteer-core": "^1.9.0", - "typescript": "^3.6.4" + "@biomejs/biome": "^2.2.2", + "@types/node": "^22.18.0", + "esbuild": "^0.25.9", + "typescript": "^5.9.2" }, "scripts": { - "start": "node ./build/index" - }, - "_moduleAliases": { - "@": "./build", - "@interface": "./build/interface", - "@sites": "./build/sites", - "@conf": "./build/config" + "esbuild":"node esbuild.js", + "start": "node ./dist/bundle.js", + "fix": "biome check --fix .", + "lint": "biome check ." } } diff --git a/runtime/screenshot/.gitignore b/runtime/screenshot/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/runtime/screenshot/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/src/Spider.ts b/src/Spider.ts index 4a1d950..c70bbd4 100644 --- a/src/Spider.ts +++ b/src/Spider.ts @@ -1,21 +1,29 @@ -import ISpider, { ScreenShotConf, Mode } from "@/interface/Spider" -import { existsSync, mkdirSync } from "fs" -import * as os from "os" +import { existsSync, mkdirSync } from 'node:fs' import puppeteer, { - Browser, - DirectNavigationOptions, - LaunchOptions, - Page, - SetCookie, -} from "puppeteer-core" -import { pseudoChrome, scriptNameMapPath } from "./config/app" + type Browser, + type CookieData, + type GoToOptions, + type LaunchOptions, + type Page, +} from 'puppeteer-core' +import { pseudoChrome, scriptNameMapPath } from './config/app' +export interface ScreenShotConf { + // 存储路径 + path: string + // 截屏文件名 + fileName: string + // 全屏 + fullPage?: boolean +} +export type Mode = 'debug' | 'prod' declare let window: Window & { chrome: any } -export default class Spider implements ISpider { +export default class Spider { startTime?: Date pages: Page[] = [] browser?: Browser - constructor(mode: Mode = "prod") { - mode = mode + mode: Mode + constructor(mode: Mode = 'prod') { + this.mode = mode } start() {} async launch(opts: LaunchOptions) { @@ -31,37 +39,37 @@ export default class Spider implements ISpider { window.chrome = pseudoChrome const originalQuery = window.navigator.permissions.query window.navigator.permissions.query = parameters => - parameters.name === "notifications" + parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission } as PermissionStatus) : originalQuery(parameters) //插件这个不要写 // Object.defineProperty(navigator, 'plugins', { // get: () => [1, 2, 3, 4, 5], // }); - Object.defineProperty(navigator, "languages", { - get: () => ["en", "zh-CN", "zh"], + Object.defineProperty(navigator, 'languages', { + get: () => ['en', 'zh-CN', 'zh'], }) - Object.defineProperty(navigator, "language", { - get: () => ["en"], + Object.defineProperty(navigator, 'language', { + get: () => ['en'], }) - Object.defineProperty(navigator, "webdriver", { + Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }) }) } async setRequestInterception(page: Page) { await page.setRequestInterception(true) - page.on("request", interceptedRequest => { - let reqUrl = interceptedRequest.url() - let suffix = "" - let urlSplitArr = reqUrl.split(".") + page.on('request', interceptedRequest => { + const reqUrl = interceptedRequest.url() + let suffix = '' + const urlSplitArr = reqUrl.split('.') if (urlSplitArr.length > 1) { suffix = urlSplitArr.pop() as string } - const disableSuffixArr = ["png", "jpg", "jpeg", "webp", "gif", "bmp", "woff", "woff2", "ttf"] + const disableSuffixArr = ['png', 'jpg', 'jpeg', 'webp', 'gif', 'bmp', 'woff', 'woff2', 'ttf'] if ( - interceptedRequest.resourceType() === "image" || - interceptedRequest.resourceType() === "font" || + interceptedRequest.resourceType() === 'image' || + interceptedRequest.resourceType() === 'font' || disableSuffixArr.includes(suffix) ) { interceptedRequest.abort() @@ -71,25 +79,24 @@ export default class Spider implements ISpider { }) } async setMaxViewport(page: Page, headless: boolean) { - let availWidth = - os.platform() === "win32" && !headless - ? await page.evaluate(() => { - console.log(screen.availWidth) - return screen.availWidth - }) - : 1920 + const [width, height] = !headless + ? await page.evaluate(() => { + console.log(screen) + return [screen.width, screen.height] + }) + : [2560, 1440] return await page.setViewport({ - width: availWidth, - height: 1080, + width: width, + height: height, }) } async screenShot(page: Page, conf: ScreenShotConf) { - const { fileName, fullPage, path } = conf + const { fullPage, path } = conf if (!existsSync(path)) { mkdirSync(path, { recursive: true }) } await page.screenshot({ - path: path + "/" + fileName + ".png", + // path: path + "/" + fileName + ".png", fullPage: !!fullPage, }) } @@ -98,16 +105,16 @@ export default class Spider implements ISpider { path: scriptNameMapPath[scriptName], }) } - async go(page: Page, url: string, opts: DirectNavigationOptions = {}) { + async go(page: Page, url: string, opts: GoToOptions = {}) { await page.goto(url, opts) } async shutdown() { if (!this.browser) { - throw new Error("browser not init!") + throw new Error('browser not init!') } await this.browser.close() } - async setCookie(page: Page, cookie: SetCookie[]) { - await page.setCookie(...cookie) + async setCookie(_: Page, cookie: CookieData[]) { + await this.browser?.setCookie(...cookie) } } diff --git a/src/app.ts b/src/app.ts new file mode 100644 index 0000000..8ff0178 --- /dev/null +++ b/src/app.ts @@ -0,0 +1,185 @@ +import { launchOpt, userAgent } from './config/app' +import Spider from './Spider' + +export default class App { + spider: Spider + site = 'https://www.xiaommx.cn' + constructor() { + this.spider = new Spider() + } + async start() { + await this.spider.launch(launchOpt) + const page = this.spider.pages[0] + page.on('console', msg => console.log('PAGE LOG:', msg.text())) + await this.spider.setUserAgent(page, userAgent) + await page.setJavaScriptEnabled(false) + await this.spider.go(page, `${this.site}/no-use/library-itch`, { + waitUntil: 'domcontentloaded', + }) + await this.spider.setMaxViewport(page, !!launchOpt.headless) + const content = await page.content() + const indexJsPath = await page.evaluate((content: string) => { + const res = /