list变String[],js sleep

本文介绍了几种实用的编程技巧,包括将List转换为字符数组、遍历Map集合、字符串的替换与截取方法,以及在JavaScript中实现自定义的sleep功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1.list变字符数组String[] s =  list.toArray(new String[list.size()])
2. map迭代,变set迭代  
  Set set = map.keySet() ; 
  Iterator iterator  = set.iterator();
while(iterator.hasNext()){
row.createCell(n).setCellValue((String)iterator.next());
n++;
}
3. 替换与截取
String s = "[x,y,x]";
String ss =s.replaceAll("\\[|\\]" , "");
ss = s.substring(1,s.length()-1);
System.out.println(ss);
String field = "title"+ ".raw";
ss = field.substring(0, field.length()-4);
System.out.println(ss);

4.js中没有自带的sleep方法,要想休眠要自己定义个方法

function sleep(numberMillis) {    
var now = new Date();    
var exitTime = now.getTime() + numberMillis;   
while (true) { 
now = new Date();       
if (now.getTime() > exitTime) 
return;    

}

import org.openqa.selenium.*; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; public class JobCrawlerWithPhone { // 请将此路径修改为你电脑上ChromeDriver的实际路径 private static final String CHROME_DRIVER_PATH = "D:/ChromeCoreDownloads/chromedriver-win64/chromedriver.exe"; // Windows示例 // private static final String CHROME_DRIVER_PATH = "/usr/local/bin/chromedriver"; // Linux/Mac示例 private static final String JOBSTREET_URL = "https://sg.jobstreet.com/jobs"; private static final String BING_MAPS_URL = "https://cn.bing.com/maps"; private static final String[] USER_AGENTS = { "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36" }; private static final Random random = new Random(); private WebDriver driver; private WebDriverWait wait; private Map<String, String> phoneCache = new HashMap<>(); // 缓存公司电话 public static void main(String[] args) { // 设置ChromeDriver路径 System.setProperty("webdriver.chrome.driver", CHROME_DRIVER_PATH); JobCrawlerWithPhone crawler = new JobCrawlerWithPhone(); try { List<JobInfo> jobs = crawler.crawlJobs(2); // 爬取2页作为示例 for (JobInfo job : jobs) { System.out.println("Title: " + job.getTitle()); System.out.println("Company: " + job.getCompany()); System.out.println("Phone: " + job.getPhone()); System.out.println("URL: " + job.getUrl()); System.out.println("Date Posted: " + job.getDatePosted()); System.out.println("Location: " + job.getLocation()); System.out.println("Salary: " + job.getSalary()); System.out.println("----------------------------------"); } } finally { crawler.quitDriver(); } } public JobCrawlerWithPhone() { // 初始化Chrome浏览器设置 ChromeOptions options = new ChromeOptions(); // 添加随机User-Agent options.addArguments("--user-agent=" + getRandomUserAgent()); // 禁用自动化控制特征,避免被网站检测 options.addArguments("--disable-blink-features=AutomationControlled"); // 无头模式(注释掉可看到浏览器操作) // options.addArguments("--headless"); options.addArguments("--disable-gpu"); options.addArguments("--window-size=1920,1080"); options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); // 初始化WebDriver driver = new ChromeDriver(options); // 设置等待超时 wait = new WebDriverWait(driver, 15); // 15秒超时 // 设置页面加载超时(秒) driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS); } public List<JobInfo> crawlJobs(int maxPages) { List<JobInfo> jobs = new ArrayList<>(); int page = 1; boolean hasNextPage = true; while (hasNextPage && page <= maxPages) { String url = page == 1 ? JOBSTREET_URL : JOBSTREET_URL + "?page=" + page; System.out.println("Crawling page: " + page + " - " + url); try { // 随机延迟避免请求过快 (1-5秒) int delay = random.nextInt(4000) + 1000; TimeUnit.MILLISECONDS.sleep(delay); // 导航到页面 driver.get(url); // 等待页面加载完成 - 等待职位卡片出现 wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("div[data-search-sol-meta]"))); // 随机滚动页面,模拟人类行为 scrollRandomly(); // 提取职位卡片 List<WebElement> jobCards = driver.findElements(By.cssSelector("div[data-search-sol-meta]")); System.out.println("Found " + jobCards.size() + " job cards on page " + page); for (WebElement card : jobCards) { JobInfo job = extractJobInfo(card); if (job != null) { // 查询公司电话 String phone = getCompanyPhone(job.getCompany()); job.setPhone(phone); jobs.add(job); // 每处理3个职位休息一下 if (jobs.size() % 3 == 0) { TimeUnit.SECONDS.sleep(2); } } } // 检查是否有下一页 hasNextPage = hasNextPage(); page++; } catch (Exception e) { System.err.println("Error crawling page " + page + ": " + e.getMessage()); hasNextPage = false; } } System.out.println("Total jobs crawled: " + jobs.size()); return jobs; } private JobInfo extractJobInfo(WebElement card) { try { JobInfo job = new JobInfo(); // 提取职位标题 WebElement titleElement = card.findElement(By.cssSelector("[data-automation=jobTitle]")); job.setTitle(titleElement.getText()); // 提取公司名称 WebElement companyElement = card.findElement(By.cssSelector("[data-automation=jobCompany]")); job.setCompany(companyElement.getText()); // 提取职位链接 WebElement linkElement = card.findElement(By.cssSelector("a[data-automation=job-list-view-job-link]")); job.setUrl(linkElement.getAttribute("href")); // 提取职位发布日期 try { WebElement dateElement = card.findElement(By.cssSelector("._1noz3r80")); job.setDatePosted(dateElement.getText()); } catch (NoSuchElementException e) { job.setDatePosted("N/A"); } // 提取职位位置 try { WebElement locationElement = card.findElement(By.cssSelector("[data-automation=jobLocation]")); job.setLocation(locationElement.getText()); } catch (NoSuchElementException e) { job.setLocation("N/A"); } // 提取薪资信息 try { WebElement salaryElement = card.findElement(By.cssSelector("[data-automation=jobSalary]")); job.setSalary(salaryElement.getText()); } catch (NoSuchElementException e) { job.setSalary("N/A"); } return job; } catch (Exception e) { System.err.println("Error extracting job info: " + e.getMessage()); return null; } } private String getCompanyPhone(String companyName) { // 检查缓存 if (phoneCache.containsKey(companyName)) { return phoneCache.get(companyName); } System.out.println("Searching phone for: " + companyName); try { // 随机延迟避免请求过快 (1-3秒) int delay = random.nextInt(2000) + 1000; TimeUnit.MILLISECONDS.sleep(delay); // 打开必应地图 driver.get(BING_MAPS_URL); // 等待搜索框出现 wait.until(ExpectedConditions.visibilityOfElementLocated(By.id("maps_sb"))); // 输入公司名称 WebElement searchBox = driver.findElement(By.id("maps_sb")); searchBox.clear(); searchBox.sendKeys(companyName); // 点击搜索按钮 WebElement searchButton = driver.findElement(By.cssSelector(".searchIcon")); searchButton.click(); // 等待搜索结果出现 wait.until(ExpectedConditions.visibilityOfElementLocated(By.cssSelector(".entityCardSection"))); // 随机滚动页面 scrollRandomly(); // 尝试获取电话信息 String phone = findPhoneOnPage(); // 缓存结果 phoneCache.put(companyName, phone != null ? phone : "Not Found"); return phone; } catch (Exception e) { System.err.println("Error searching phone for " + companyName + ": " + e.getMessage()); return "Error: " + e.getMessage(); } } private String findPhoneOnPage() { try { // 尝试从实体卡片中获取电话 List<WebElement> phoneElements = driver.findElements(By.cssSelector(".entityCard-phone")); if (!phoneElements.isEmpty()) { return phoneElements.get(0).getText(); } // 尝试从侧边栏获取电话 List<WebElement> sideElements = driver.findElements(By.cssSelector(".directionsInfoPanel .phone")); if (!sideElements.isEmpty()) { return sideElements.get(0).getText(); } // 尝试从信息面板获取 List<WebElement> infoElements = driver.findElements(By.cssSelector(".infocardContainer .phone")); if (!infoElements.isEmpty()) { return infoElements.get(0).getText(); } // 尝试匹配电话号码模式 Pattern phonePattern = Pattern.compile("\\+?\\d[\\d\\s()-]{5,}\\d"); String pageSource = driver.getPageSource(); Matcher matcher = phonePattern.matcher(pageSource); if (matcher.find()) { return matcher.group(); } return "Phone not found"; } catch (Exception e) { return "Error: " + e.getMessage(); } } private boolean hasNextPage() { try { // 检查是否有下一页按钮且可点击 WebElement nextPageButton = wait.until( ExpectedConditions.elementToBeClickable(By.cssSelector("a[aria-label=Go to next page]")) ); return nextPageButton.isEnabled() && nextPageButton.isDisplayed(); } catch (Exception e) { return false; } } private String getRandomUserAgent() { return USER_AGENTS[random.nextInt(USER_AGENTS.length)]; } // 随机滚动页面,模拟人类浏览行为 private void scrollRandomly() throws InterruptedException { JavascriptExecutor js = (JavascriptExecutor) driver; // 获取页面高度 long pageHeight = (long) js.executeScript("return document.body.scrollHeight"); // 随机滚动几次 int scrollTimes = random.nextInt(3) + 2; // 2-4次滚动 for (int i = 0; i < scrollTimes; i++) { // 随机滚动到页面的某个位置 long scrollTo = (long) (Math.random() * pageHeight * 0.8) + (long) (pageHeight * 0.1); js.executeScript("window.scrollTo(0, " + scrollTo + ")"); // 每次滚动后随机等待 int scrollDelay = random.nextInt(1000) + 500; // 500-1500毫秒 TimeUnit.MILLISECONDS.sleep(scrollDelay); } // 滚动到顶部 js.executeScript("window.scrollTo(0, 0)"); TimeUnit.MILLISECONDS.sleep(500); } // 退出浏览器 public void quitDriver() { if (driver != null) { driver.quit(); } } } class JobInfo { private String title; private String company; private String phone; private String url; private String datePosted; private String location; private String salary; // Getters and setters public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getCompany() { return company; } public void setCompany(String company) { this.company = company; } public String getPhone() { return phone; } public void setPhone(String phone) { this.phone = phone; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getDatePosted() { return datePosted; } public void setDatePosted(String datePosted) { this.datePosted = datePosted; } public String getLocation() { return location; } public void setLocation(String location) { this.location = location; } public String getSalary() { return salary; } public void setSalary(String salary) { this.salary = salary; } }查询电话部分修改,搜索后找到class为b_vPanel的最后一个元素的内容为电话
最新发布
08-01
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值