proxy manager used to scrape data
npm install @letsscrapedata/proxysh
npm install @letsscrapedata/proxy
`
Examples
`typescript
import { ProxyManager, GeneralAccount, AddPackageData, ProxyRequirements } from "@letsscrapedata/proxy";
let filename = "isp-proxies.txt";
const generalAccount = new GeneralAccount();
const addPackageData: AddPackageData = {
proxyIpType: "isp",
proxyDurationType: "static",
proxySharedType: "dedicated",
billingModel: "period",
vendorName: "unkown", // important for some proxy vendors
// priority: 10, // the smaller the higher
maxProxyDuration: 3600 * 24, // seconds
// maxSessionDuraion: 3600, // for rotating proxies
maxUsersPerIp: 4,
maxConcurrencyPerUser: 10,
bandwidth: 0, // unkown
filename
}
generalAccount.update("addPackage", addPackageData);
// add new proxies from filename regularly
await generalAccount.startRefresh();
let proxyManager = new ProxyManager();
proxyManager.addProxyAccount(generalAccount)
// more diffenct ProxyAccounts can be added
// get proxies according to requirements
const proxyRequirements: ProxyRequirements = {
proxyIpSharedTypes: ["rd", "id"], // residential-dedicated or isp-dedicated proxy
minProxyValidSeconds: 3600
}
const proxies = await proxyManager.getProxies(proxyRequirements, 2);
await sleep(20000)
await proxyManager.close();
`
interface ProxyAccountManager
The new ProxyAccountManager should contain as complete and accurate information as possible so that the crawler scheduler can accurately determine whether the proxy is applicable, control users and access concurrency.
`typescript
/**
* Proxy account manager manages proxies that an account has purchased from a proxy vendor. Basic concepts:
ProxyAccount: an account usually manages the proxies provided by an proxy vendor. A prorxyAccount may purchase 0 or many proxyPackages. A joint proxy account manages proxies provided by many vendors.
ProxyPackage: a package that you buy from a proxy vendor. Each proxy package usually contains many proxies of the same type, that will expire later.
Proxy: a network proxy that can be used to scrape data
NewProxyEvent: event emitted when new proxies are added
*/
export interface ProxyAccountManager extends EventEmitter {
/**
* each type of proxyAccountManager decides which parameters are required or optional:
the constructor should throw error if the parameters are invalid.
*/
// new(options?: ProxyAccountManagerOptions): ProxyAccountManager;
/**
* set new options of this manager, each type of proxyAccountManager decides which options can be updated
* @param options
*/
setOptions(options: ProxyAccountManagerOptions): boolean;
/**
* Start to refresh proxies:
refresh once immediately if options.refreshNow is true
refresh periodly if refreshIntervalSecs of ProxyAccountManager is greater than 0
* @param options
*/
startRefresh(options: RefreshOptions): Promise;
/**
* Stop to refresh periodly.
*/
stopRefresh(): boolean;
/**
* Get proxy packages that meet the conditions.
used only by ProxyManager
* @param proxyRequirements
*/
_getProxyPackages(proxyRequirements: ProxyRequirements): ProxyPackageInfo[];
/**
* Get proxies, which meet the conditions, from a package.
used only by ProxyManager, user should use getProxies()
* @param proxyRequirements
* @param proxyNum default 1
* @param onlyApplied default false, whether to get proxies only from applied proxies
* @param onlyApply default false;false - apply and allocate,true - apply and not allocate
*/
_getProxiesFromPackage(
proxyPackageInfo: ProxyPackageInfo,
proxyRequirements: ProxyRequirements,
proxyNum?: number,
onlyApplied?: boolean,
onlyApply?: boolean
): Promise;
/**
* Get the applied proxies that meet proxyRequirements
* @param proxyRequirements
* @param proxyNum defaut 0, <=0: all, >0: the number of proxies to get
* @param onlyApply default false;false - apply and allocate,true - apply and not allocate
*/
// getAppliedProxies(proxyRequirements: ProxyRequirements, proxyNum?: number, onlyApply?: boolean): Promise;
/**
* Get proxies that meet the conditions, can be used now, and have the highest priority(and the minimum expireTime if same priority).
The number of proxies in return may be less than proxyNum.
Return [] if there is no requried proxies.
Throw error if there is exeception.
* @param proxyRequirements
* @param proxyNum default 1, the number of proxies to get
* @param onlyApplied default false, whether to get proxies only from applied proxies
*/
getProxies(proxyRequirements: ProxyRequirements, proxyNum?: number, onlyApplied?: boolean): Promise;
/**
* Discard a proxy that is expired or invalid. This proxy will not be used again.
* @param proxy
*/
discardProxy(proxy: Proxy): boolean;
/**
Free a busy proxy for later use.
* @param proxy
*/
freeProxy(proxy: Proxy): boolean;
/**
* Lock a proxy that cannot be used now and can be used later again (usually lock static proxy):
Free this proxy if it is busy (in use)
Lock this proxy by updating lastAbnormalTime of this proxy (status of proxy is "idle")
Please don't lock proxy if it is used to access many different websites
* @param proxy
*/
lockProxy(proxy: Proxy): boolean;
/**
* Optional method, launched manually and used by some types of proxyAccountManager, such as GeneralAccount uses this to add new package or add new proxies.
It's best to refresh periodly in startRefresh method.
Use this method only when it is requried.
* @param opType
* @param data
*/
update(opType?: string, data?: object): Promise;
/**
* Adjust the priority of packages
* @param priority
* @param packageNames
*/
adjustPriorityOfPackages(priority: number, packageNames: PackageName[]): boolean;
/**
* Enable packages
* @param packageNames
*/
enablePackages(packageNames: PackageName[]): boolean;
/**
* Disable packages
* @param packageNames
*/
disablePackages(packageNames: PackageName[]): boolean;
/**
* Enable this proxyAccount
*/
enable(): boolean;
/**
* Disable this proxyAccount
*/
disable(): boolean;
proxyAccountId(): ProxyAccountId;
status(): ProxyAccountStatus;
proxyAccount(): ProxyAccount;
proxyPackages(): ProxyPackage[];
proxies(): Proxy[];
/**
* Close this proxy account manager (proxyAccount) that cannot be used again.
*/
close(): Promise;
}
``