《2021博客之星年度总评选》数据采集样例程序
- pom.xml
- 线上投票博客之星数据采集
- 投票贡献排行榜数据采集
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>1</groupId>
<artifactId>_psimplemvn</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
</dependencies>
</project>
线上投票博客之星数据采集
package simple.call.blogstar;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import simple.call.util.StringUtil;
import simple.call.util.TimeUtil;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class BlogStarStatisticsTest {
private static String outPutPath = "/home/wangyetao/IdeaProjects/_psimplemvn/src/main/java/simple/call/blogstar/";
private static String filename = "blog_star2020";
private static String sheetname = filename.toUpperCase();
private static String suffix = ".xlsx";
private static FileOutputStream outputStream;
private static ArrayList<BlogStar> blogStars;
private static String url = "https://bss.csdn.net/m/topic/blog_star2020";
public static void main(String[] args) throws InterruptedException {
System.setProperty("webdriver.chrome.driver", "/usr/bin/chromedriver");
ChromeDriver driver = new ChromeDriver();
driver.get(url);
blogStars = new ArrayList<BlogStar>();
Thread.sleep(3000);
List<WebElement> search_results = driver.findElements(By.xpath("//*[@id=\"blogList\"]/li"));
for (int i = 0; i < search_results.size(); i++) {
WebElement element = search_results.get(i);
BlogStar blogStar = new BlogStar();
blogStar.createTime = TimeUtil.getStampToString(System.currentTimeMillis(), "yyyy-MM-dd HH:mm:ss");
blogStar.num = element.findElement(By.className("num")).getText();
blogStar.name = element.findElement(By.className("name")).getText();
blogStar.avatarUrl = element.findElement(By.tagName("img")).getAttribute("src");
blogStar.intlevel = StringUtil.getInts(element.findElement(By.className("level")).getText())[0];
blogStar.intBlogNum = StringUtil.getInts(element.findElement(By.className("blog-num")).getText())[1];
blogStar.intCurrentVote = StringUtil.getInts(element.findElement(By.className("current-vote")).getText())[0];
blogStars.add(blogStar);
}
driver.close();
ArrayList<String> heads = new ArrayList<String>();
heads.add("序号");
heads.add("博客简称");
heads.add("小头像url");
heads.add("码龄(年)");
heads.add("年度原创博文数");
heads.add("当前票数");
heads.add("录入时间");
System.out.println("Creating excel");
try {
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = workbook.createSheet(sheetname);
for (int i = 0; i < heads.size(); i++) {
if (i == 0) {
sheet.setColumnWidth(i, 6 * 256);
} else if (i == 6) {
sheet.setColumnWidth(i, 20 * 256);
} else {
sheet.setColumnWidth(i, 15 * 256);
}
}
Row row = null;
Cell cell = null;
row = sheet.createRow(0);
for (int i = 0; i < heads.size(); i++) {
cell = row.createCell(i);
cell.setCellValue(heads.get(i));
}
int rowNum = 1;
int colNum = 0;
for (BlogStar blogStar : blogStars) {
row = sheet.createRow(rowNum++);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.num);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.name);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.avatarUrl);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.intlevel);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.intBlogNum);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.intCurrentVote);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.createTime);
colNum = 0;
}
outputStream = new FileOutputStream(outPutPath + filename + suffix);
workbook.write(outputStream);
outputStream.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Done");
}
}
投票贡献排行榜数据采集
package simple.call.blogstar;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import simple.call.util.StringUtil;
import simple.call.util.TimeUtil;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class BlogStarStatisticsVoteLeaderboardList {
private static String outPutPath = "/home/wangyetao/IdeaProjects/_psimplemvn/src/main/java/simple/call/blogstar/";
private static String filename = "aa518189.xlsx";
private static FileOutputStream outputStream;
private static ArrayList<BlogStar> blogVotes;
private static String url = "https://bss.csdn.net/m/topic/blog_star2020/detail?username=aa518189";
public static void main(String[] args) throws InterruptedException {
System.setProperty("webdriver.chrome.driver", "/usr/bin/chromedriver");
ChromeDriver driver = new ChromeDriver();
driver.get(url);
blogVotes = new ArrayList<BlogStar>();
Thread.sleep(2000);
List<WebElement> search_results = driver.findElements(By.xpath("//*[@id=\"voteLeaderboardList\"]/li"));
for (int i = 0; i < search_results.size(); i++) {
WebElement element = search_results.get(i);
BlogStar blogStar = new BlogStar();
blogStar.createTime = TimeUtil.getStampToString(System.currentTimeMillis(), "yyyy-MM-dd HH:mm:ss");
blogStar.num = element.findElement(By.className("num")).getText();
blogStar.name = element.findElement(By.className("text")).getText();
blogStar.intlevel = StringUtil.getInts(element.findElement(By.className("code-age")).getText())[0];
blogStar.intCurrentVote = StringUtil.getInts(element.findElement(By.className("vote-num")).getText())[0];
blogVotes.add(blogStar);
}
driver.close();
ArrayList<String> heads = new ArrayList<String>();
heads.add("编号");
heads.add("博粉名称");
heads.add("码龄(年)");
heads.add("支持票数");
heads.add("录入时间");
System.out.println("Creating excel");
try {
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = workbook.createSheet("BLOGVOTES");
for (int i = 0; i < heads.size(); i++) {
if (i == 0) {
sheet.setColumnWidth(i, 6 * 256);
} else if (i == 4) {
sheet.setColumnWidth(i, 20 * 256);
} else {
sheet.setColumnWidth(i, 15 * 256);
}
}
Row row = null;
Cell cell = null;
row = sheet.createRow(0);
for (int i = 0; i < heads.size(); i++) {
cell = row.createCell(i);
cell.setCellValue(heads.get(i));
}
int rowNum = 1;
int colNum = 0;
for (BlogStar blogStar : blogVotes) {
row = sheet.createRow(rowNum++);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.num);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.name);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.intlevel);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.intCurrentVote);
cell = row.createCell(colNum++);
cell.setCellValue(blogStar.createTime);
colNum = 0;
}
outputStream = new FileOutputStream(outPutPath + filename);
workbook.write(outputStream);
outputStream.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Done");
}
}