在当今数字化高速发展的时代,数据规模呈现爆炸式增长的态势。无论是电子商务平台的海量交易记录,社交媒体中的海量用户动态,还是企业级应用中的复杂业务数据,其数量都极为庞大。在这样的背景下,如何高效地处理和存储海量数据成为应用开发中至关重要的挑战。
批量插入数据是众多系统中频繁出现的操作场景,特别是在数据初始化、数据迁移或者高并发写入等情境下。然而,当面对百万级甚至更庞大的数据量时,传统的逐个插入方式往往由于性能瓶颈,导致系统响应迟缓,甚至可能出现超时或崩溃的状况。
为了有效应对这一严峻挑战,我们必须采用更为高效的技术和架构策略。Spring Boot 作为强大且成熟的开发框架,为构建稳定可靠的应用奠定了坚实基础。MyBatis-Plus 在数据操作方面提供了便捷高效的途径。而 ThreadPoolTaskExecutor 能够充分发挥多核 CPU 的优势,借助并发处理大幅提升数据插入的速度。
通过将这三者有机结合,我们能够构建一个快速、稳定且能够处理百万级数据批量插入的系统,满足业务对于数据处理的高性能和高可靠性要求。这不仅能够显著提升系统的整体性能,还能为用户带来更为流畅的使用体验,增强系统在激烈市场竞争中的核心竞争力。
MySQL DDL 语句
CREATE TABLE `data_table` (
`id` BIGINT(20) PRIMARY KEY AUTO_INCREMENT,
`name` VARCHAR(255),
`description` VARCHAR(255)
);
项目创建及依赖配置(pom.xml)
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.0.0</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.icoderoad</groupId>
<artifactId>batch-insertion</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Batch Insertion</name>
<properties>
<java.version>17</java.version>
</properties>
<dependencies>
<!-- Spring Boot Web 依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- MyBatis-Plus 依赖 -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.5.3.1</version>
</dependency>
<!-- 数据库驱动 -->
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>
<!-- 线程池依赖 -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-tx</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
配置文件(application.yml)
spring:
datasource:
url: jdbc:mysql://localhost:3306/db_name?useUnicode=true&characterEncoding=UTF-8&useSSL=false
username: username
password: password
driver-class-name: com.mysql.cj.jdbc.Driver
task:
executor:
core-pool-size: 100
max-pool-size: 300
queue-capacity: 99999
实体类
package com.icoderoad.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
@Data
@TableName("data_table")
public class DataEntity {
private Long id;
private String name;
private String description;
}
Mapper 接口
package com.icoderoad.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.icoderoad.entity.DataEntity;
public interface DataMapper extends BaseMapper<DataEntity> {
}
DataService 接口类
package com.icoderoad.service;
import java.util.List;
import com.icoderoad.entity.DataEntity;
public interface DataService {
void batchInsertData(List<DataEntity> dataList);
}
服务类
package com.icoderoad.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.icoderoad.entity.DataEntity;
import com.icoderoad.mapper.DataMapper;
import com.icoderoad.service.DataService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
@Service
public class DataServiceImpl extends ServiceImpl<DataMapper, DataEntity> implements DataService {
@Autowired
private ThreadPoolTaskExecutor taskExecutor;
@Transactional
public void batchInsertData(List<DataEntity> dataList) {
// 分批插入数据
int batchSize = 1000; // 每批插入的数量
List<Future<?>> futures = new ArrayList<>();
for (int i = 0; i < dataList.size(); i += batchSize) {
List<DataEntity> subList = dataList.subList(i, Math.min(i + batchSize, dataList.size()));
futures.add(taskExecutor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
baseMapper.insertBatchSomeColumn(subList);
return null;
}
}));
}
for (Future<?> future : futures) {
try {
future.get();
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
}
}
}
** Executor 配置类**
package com.icoderoad.config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
@Configuration
public class ExecutorConfig {
@Value("${spring.task.executor.core-pool-size}")
private int corePoolSize;
@Value("${spring.task.executor.max-pool-size}")
private int maxPoolSize;
@Value("${spring.task.executor.queue-capacity}")
private int queueCapacity;
@Bean
public ThreadPoolTaskExecutor taskExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(corePoolSize);
executor.setMaxPoolSize(maxPoolSize);
executor.setQueueCapacity(queueCapacity);
return executor;
}
}
控制器类
package com.icoderoad.controller;
import com.icoderoad.entity.DataEntity;
import com.icoderoad.service.DataService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@RestController
public class DataController {
@Autowired
private DataService dataService;
@PostMapping("/batchInsert")
public String batchInsert(@RequestBody List<DataEntity> dataEntities) {
dataService.batchInsertData(dataEntities);
return "Batch insertion successful";
}
public static void main(String[] args) {
List<DataEntity> dataList = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < 1000000; i++) {
DataEntity dataEntity = new DataEntity();
dataEntity.setName("Name " + i);
dataEntity.setDescription("Description " + random.nextInt());
dataList.add(dataEntity);
}
}
}
总结
通过以上的优化和完善,我们充分利用了 Spring Boot 的强大功能、MyBatis-Plus 的便捷操作以及 ThreadPoolTaskExecutor 的高效并发处理能力,成功实现了百万级数据的批量插入。在实际应用中,还可以根据具体的业务需求和性能要求,对代码进行进一步的优化和调整。