Spring Boot integrated sitemapgen4j Implement website map generation
1.what is sitemapgen4j
is one used in Java generated in XML A library for sitemaps, such as for generating websites sitemap, If the number exceeds 50,000, another one needs to be written. sitemap Among them, this function sitemapgen4j It has been implemented for us, no need to worry.
sitemap
Sitemaps are a simple way for webmasters to tell search engines about the pages on their site that are available for crawling.In its simplest form, a sitemap is a XML file that lists the site’s URL and about each URL additional metadata (when it was last updated, how often it usually changes, and relative to other URL importance) ) so that search engines can crawl the site more intelligently. Web crawlers often discover pages through links within a website and other websites.Sitemaps supplement this data to allow sitemap-enabled crawlers to pick up all URL, and use associated metadata to understand these URL。 use Sitemap An agreement does not guarantee that a page will be indexed by search engines, but it can provide tips for web crawlers to better crawl your site. Sitemap 0.90 according to Attribution-ShareAlike Creative Commons License terms are provided and widely adopted, including Google、Yahoo! and Microsoft support. Sitemap is a simple way for webmasters to notify search engines of available pages on their site for crawling.In its simplest form, a sitemap is a XML file that lists the site’s URL and about each URL additional metadata (when it was last updated, how often it usually changes, and relative to other URL importance) ) so that search engines can crawl the site more intelligently.
2.code engineering
Experiment purpose: generate website map
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>springboot-demo</artifactId>
<groupId>com.et</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>sitemap</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.dfabulich</groupId>
<artifactId>sitemapgen4j</artifactId>
<version>1.1.1</version>
</dependency>
</dependencies>
</project>
application.yaml
server:
port: 8088
job
package com.et.sitemap.job;
import com.redfin.sitemapgenerator.SitemapIndexGenerator;
import com.redfin.sitemapgenerator.W3CDateFormat;
import com.redfin.sitemapgenerator.WebSitemapGenerator;
import com.redfin.sitemapgenerator.WebSitemapUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.File;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* @author liuhaihua
* @version 1.0
* @ClassName SiteMapJob
* @Description todo
* @date 2024年04月25日 17:44
*/
@Component
public class SiteMapJob {
private Logger log = LoggerFactory.getLogger(getClass());
//@Scheduled(cron = "0 0 0 * * ?")
@Scheduled(initialDelay = 1000,fixedRate = 10000)
public void generateSitemap() {
log.info("start generate sitemap");
String tempPath = "D://tmp/";
File file = new File(tempPath);
if (!file.exists()) {
file.mkdirs();
}
String domain = "http://www.liuhaihua.cn";
try {
WebSitemapGenerator g1 = WebSitemapGenerator.builder(domain, file)
.fileNamePrefix("article").build();
Date date = new Date();
for (int i = 1; i < 160000; i++) {
WebSitemapUrl url = new WebSitemapUrl.Options(domain + "/article/" + i).lastMod(date).build();
g1.addUrl(url);
}
WebSitemapGenerator g2 = WebSitemapGenerator.builder(domain, file)
.fileNamePrefix("tag").build();
Date date2 = new Date();
for (int i = 1; i < 21; i++) {
WebSitemapUrl url = new WebSitemapUrl.Options(domain + "/tag/" + i).lastMod(date2).build();
g2.addUrl(url);
}
WebSitemapGenerator g3 = WebSitemapGenerator.builder(domain, file)
.fileNamePrefix("type").build();
Date date3 = new Date();
for (int i = 1; i < 21; i++) {
WebSitemapUrl url = new WebSitemapUrl.Options(domain + "/type/" + i).lastMod(date3).build();
g3.addUrl(url);
}
List<String> fileNames = new ArrayList<>();
List<File> articleFiles = g1.write();
articleFiles.forEach(e -> fileNames.add(e.getName()));
List<File> tagFiles = g2.write();
tagFiles.forEach(e -> fileNames.add(e.getName()));
List<File> typeFiles = g3.write();
typeFiles.forEach(e -> fileNames.add(e.getName()));
W3CDateFormat dateFormat = new W3CDateFormat(W3CDateFormat.Pattern.DAY);
SitemapIndexGenerator sitemapIndexGenerator = new SitemapIndexGenerator
.Options(domain, new File(tempPath + "sitemap_index.xml"))
.dateFormat(dateFormat)
.autoValidate(true)
.build();
fileNames.forEach(e -> {
try {
sitemapIndexGenerator.addUrl(domain + "/" + e);
} catch (MalformedURLException e1) {
e1.printStackTrace();
}
});
// generate sitemap_index file
sitemapIndexGenerator.write();
log.info("end generate sitemap");
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
}
DemoApplication.java
package com.et.sitemap;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication
@EnableScheduling
public class DemoApplication {
public static void main(String[] args) {
SpringApplication.run(DemoApplication.class, args);
}
}
The above are just some key codes. For all codes, please see the code repository below.
code repository
3.test
- start up spring boot application
- View generated files
