增加声音克隆oss批量
parent
9e7d3137e6
commit
cfb38ccf8b
30
README.md
30
README.md
|
@ -1 +1,29 @@
|
|||
pm2 start --name admin-banban-new-nest npm -- run start:prod
|
||||
pm2 start --name admin-banban-new-nest npm -- run start:prod
|
||||
|
||||
// {
|
||||
// roleId: '6704bd0ef48326fe51ddb751',
|
||||
// roleName: '甘宁',
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E4%BB%A3%E5%8F%B7%E9%B8%A2%E5%AF%86%E6%8E%A2%E7%94%98%E5%AE%81.mp3',
|
||||
// taskId: '674d85d7467ce26d974f97e9'
|
||||
// }
|
||||
// 开始处理任务 [第1次尝试]: 甘宁
|
||||
// {
|
||||
// SpeakerID: 'S*FC60x0Gb1',
|
||||
// InstanceNO: 'Model_storage_BpVrslAkhpaKVod*',
|
||||
// IsActivatable: false,
|
||||
// State: 'Unknown',
|
||||
// DemoAudio: null,
|
||||
// Version: null,
|
||||
// CreateTime: 0,
|
||||
// ExpireTime: 1764691199000,
|
||||
// Alias: '',
|
||||
// AvailableTrainingTimes: 10
|
||||
// }
|
||||
// S_FC60x0Gb1
|
||||
|
||||
开始克隆声音,参数: { speaker_id: 'S_FC60x0Gb1', appid: '8167092294', audio_format: 'wav' }
|
||||
|
||||
<!-- BaseResp: {
|
||||
StatusCode: 1114,
|
||||
StatusMessage: 'snr check failed, snr: 0.33, threshold: 5.00'
|
||||
}, -->
|
||||
|
|
10
package.json
10
package.json
|
@ -9,6 +9,7 @@
|
|||
"build": "nest build",
|
||||
"format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
|
||||
"start": "nest start",
|
||||
"dev": "nest start",
|
||||
"start:dev": "nest start --watch",
|
||||
"start:debug": "nest start --debug --watch",
|
||||
"start:prod": "node dist/main",
|
||||
|
@ -21,15 +22,22 @@
|
|||
"gen": "prisma generate"
|
||||
},
|
||||
"dependencies": {
|
||||
"@nestjs/bull": "^10.2.3",
|
||||
"@nestjs/common": "^10.0.0",
|
||||
"@nestjs/core": "^10.0.0",
|
||||
"@nestjs/platform-express": "^10.0.0",
|
||||
"@prisma/client": "^5.19.1",
|
||||
"@volcengine/openapi": "^1.25.0",
|
||||
"ali-oss": "^6.21.0",
|
||||
"axios": "^1.7.8",
|
||||
"bull": "^4.16.4",
|
||||
"class-validator": "^0.14.1",
|
||||
"ioredis": "^5.4.1",
|
||||
"lodash": "^4.17.21",
|
||||
"prisma": "^5.19.1",
|
||||
"reflect-metadata": "^0.2.0",
|
||||
"rxjs": "^7.8.1"
|
||||
"rxjs": "^7.8.1",
|
||||
"uuid": "^11.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@nestjs/cli": "^10.0.0",
|
||||
|
|
779
pnpm-lock.yaml
779
pnpm-lock.yaml
File diff suppressed because it is too large
Load Diff
|
@ -26,4 +26,35 @@ model SystemCharter {
|
|||
bg String?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
// 音色id
|
||||
voiceId String?
|
||||
// 音色名称
|
||||
voiceName String?
|
||||
|
||||
// 原始音频
|
||||
originAudioUrl String?
|
||||
// 克隆音频
|
||||
cloneAfterAudioUrl String?
|
||||
// 是否激活
|
||||
activate Boolean @default(false)
|
||||
}
|
||||
|
||||
// 任务队列
|
||||
model TaskQueue {
|
||||
id String @id @default(auto()) @map("_id") @db.ObjectId
|
||||
// 任务类型
|
||||
type String
|
||||
// 任务数据
|
||||
data String
|
||||
// 任务状态: pending/processing/completed/failed
|
||||
status String
|
||||
// 失败原因
|
||||
error String?
|
||||
// 重试次数
|
||||
attempts Int @default(0)
|
||||
// 最大重试次数
|
||||
maxAttempts Int @default(3)
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
|
|
@ -3,11 +3,11 @@ import { AppController } from './app.controller';
|
|||
import { AppService } from './app.service';
|
||||
import { SystemCharterModule } from './modules/index';
|
||||
import { DBModule } from './utils/db/DB.module';
|
||||
|
||||
import { RedisTask } from './common/RedisTask/redisTask.module';
|
||||
|
||||
@Module({
|
||||
imports: [DBModule, SystemCharterModule],
|
||||
imports: [DBModule, SystemCharterModule, RedisTask],
|
||||
controllers: [AppController],
|
||||
providers: [AppService],
|
||||
})
|
||||
export class AppModule { }
|
||||
export class AppModule {}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
import { Global, Module } from '@nestjs/common';
|
||||
import { BullModule } from '@nestjs/bull';
|
||||
import { RedisTaskProcessor } from './RedisTaskProcessor.service';
|
||||
import { RedisTaskService } from './RedisTask.service';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [
|
||||
BullModule.forRoot({
|
||||
redis: {
|
||||
host: '47.101.147.173',
|
||||
port: 6379,
|
||||
password: 'SAe7HmZhkF8uev',
|
||||
},
|
||||
defaultJobOptions: {
|
||||
removeOnComplete: true,
|
||||
attempts: 3,
|
||||
},
|
||||
}),
|
||||
BullModule.registerQueue({
|
||||
name: 'task_queue',
|
||||
limiter: {
|
||||
max: 1,
|
||||
duration: 1000,
|
||||
},
|
||||
}),
|
||||
BullModule.registerQueue({
|
||||
name: 'dead_letter_queue',
|
||||
}),
|
||||
],
|
||||
providers: [RedisTaskService, RedisTaskProcessor],
|
||||
exports: [RedisTaskService],
|
||||
})
|
||||
export class RedisTask {}
|
|
@ -0,0 +1,94 @@
|
|||
import { InjectQueue } from '@nestjs/bull';
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Queue } from 'bull';
|
||||
import { DBService } from 'src/utils/db/DB.service';
|
||||
|
||||
@Injectable()
|
||||
export class RedisTaskService {
|
||||
constructor(
|
||||
@InjectQueue('task_queue') private readonly taskQueue: Queue,
|
||||
private readonly dbService: DBService,
|
||||
) {}
|
||||
|
||||
// 添加任务
|
||||
async addTask(data: any) {
|
||||
// 先记录到数据库
|
||||
const taskRecord = await this.dbService.taskQueue.create({
|
||||
data: {
|
||||
type: 'BATCH_CLONE_AUDIO',
|
||||
data: JSON.stringify(data),
|
||||
status: 'pending',
|
||||
},
|
||||
});
|
||||
|
||||
// 添加到Redis队列
|
||||
await this.taskQueue.add(
|
||||
'BATCH_CLONE_AUDIO',
|
||||
{
|
||||
...data,
|
||||
taskId: taskRecord.id,
|
||||
},
|
||||
{
|
||||
removeOnComplete: true,
|
||||
attempts: 3,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// 获取所有任务
|
||||
async getTasks(status?: string) {
|
||||
const where = status ? { status } : {};
|
||||
return await this.dbService.taskQueue.findMany({
|
||||
where,
|
||||
orderBy: {
|
||||
createdAt: 'desc',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// 更新任务状态
|
||||
async updateTaskStatus(taskId: string, status: string, error?: string) {
|
||||
return await this.dbService.taskQueue.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status,
|
||||
error,
|
||||
attempts: { increment: 1 },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// 重试失败的任务
|
||||
async retryTask(taskId: string) {
|
||||
const task = await this.dbService.taskQueue.findUnique({
|
||||
where: { id: taskId },
|
||||
});
|
||||
|
||||
if (task && task.status === 'failed') {
|
||||
// 重置任务状态
|
||||
await this.dbService.taskQueue.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'pending',
|
||||
attempts: 0,
|
||||
error: null,
|
||||
},
|
||||
});
|
||||
|
||||
// 重新加入队列
|
||||
await this.taskQueue.add(
|
||||
'BATCH_CLONE_AUDIO',
|
||||
{
|
||||
...JSON.parse(task.data),
|
||||
taskId: task.id,
|
||||
},
|
||||
{
|
||||
removeOnComplete: true,
|
||||
attempts: 3,
|
||||
},
|
||||
);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
import { Process, Processor } from '@nestjs/bull';
|
||||
import { Job } from 'bull';
|
||||
import { RedisTaskService } from './RedisTask.service';
|
||||
import { VolcenAudioSpeakService } from 'src/services/VolcenAudioSpeakService';
|
||||
import { CloneSpeakDto } from 'src/modules/SystemCharter/dto/CloneSpeakDto.dto';
|
||||
import { DBService } from 'src/utils/db/DB.service';
|
||||
|
||||
@Processor('task_queue')
|
||||
export class RedisTaskProcessor {
|
||||
constructor(
|
||||
private readonly redisTaskService: RedisTaskService,
|
||||
private readonly dbService: DBService,
|
||||
) {}
|
||||
|
||||
@Process('BATCH_CLONE_AUDIO')
|
||||
async handleSpecificTask(job: Job) {
|
||||
let voiceId;
|
||||
try {
|
||||
const charterInfo = job.data as CloneSpeakDto;
|
||||
const { taskId, url: ossUrl } = charterInfo;
|
||||
if (taskId) {
|
||||
await this.redisTaskService.updateTaskStatus(taskId, 'processing');
|
||||
}
|
||||
console.log(job.data);
|
||||
console.log(`开始处理任务 [第${job.attemptsMade + 1}次尝试]: ${job.data.roleName}`);
|
||||
|
||||
const volcenAudioSpeakService = new VolcenAudioSpeakService();
|
||||
// voiceId = await volcenAudioSpeakService.getVoiceId();
|
||||
// const voiceId = 'S_FC60x0Gb1';
|
||||
const voiceId = 'S_VK2Yw0Gb1';
|
||||
// 请求ossurl获取base64
|
||||
const base64 = await volcenAudioSpeakService.getAudioBase64(ossUrl);
|
||||
// 克隆音频
|
||||
volcenAudioSpeakService.speakClone(voiceId, base64);
|
||||
// // 5秒后激活
|
||||
setTimeout(async () => {
|
||||
if (taskId) {
|
||||
await this.redisTaskService.updateTaskStatus(taskId, 'completed');
|
||||
// 激活音频 (激活后就不能克隆了,所以要克隆后确定后再激活)
|
||||
// await volcenAudioSpeakService.speakActivate(voiceId);
|
||||
const textToSpeechResult = await volcenAudioSpeakService.textToSpeech(
|
||||
'今天有什么新鲜事吗?快给我讲讲',
|
||||
voiceId,
|
||||
);
|
||||
// 修改系统角色的 voiceId
|
||||
await this.dbService.systemCharter.update({
|
||||
where: { id: charterInfo.roleId },
|
||||
data: {
|
||||
voiceId,
|
||||
voiceName: charterInfo.roleName,
|
||||
// 是否激活
|
||||
activate: true,
|
||||
originAudioUrl: ossUrl,
|
||||
cloneAfterAudioUrl: textToSpeechResult.url,
|
||||
},
|
||||
});
|
||||
}
|
||||
console.log(`任务处理完成: ${job.data.roleName}`);
|
||||
}, 1000 * 5);
|
||||
} catch (error) {
|
||||
console.log(error.data);
|
||||
console.log('voiceId=--------------', voiceId);
|
||||
console.error(`任务处理失败 [第${job.attemptsMade + 1}次尝试]: ${error.message}`);
|
||||
if (job.data.taskId) {
|
||||
await this.redisTaskService.updateTaskStatus(job.data.taskId, 'failed', error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,11 +3,16 @@ import { SystemCharterlDto } from './dto/SystemCharter.dto';
|
|||
import { Pagination } from 'src/common/pagination';
|
||||
import { DBService } from 'src/utils/db/DB.service';
|
||||
import { ApiResponse } from 'src/utils/response/response';
|
||||
import { RedisTaskService } from 'src/common/RedisTask/RedisTask.service';
|
||||
import { CloneSpeakDto } from './dto/CloneSpeakDto.dto';
|
||||
import axios from 'axios';
|
||||
|
||||
@Controller('/system/charter')
|
||||
export class SystemCharterController {
|
||||
constructor(private readonly dbService: DBService) { }
|
||||
|
||||
constructor(
|
||||
private readonly dbService: DBService,
|
||||
private readonly redisTaskService: RedisTaskService,
|
||||
) {}
|
||||
|
||||
@Get('/getList')
|
||||
async getList(@Query() query: SystemCharterlDto) {
|
||||
|
@ -52,4 +57,53 @@ export class SystemCharterController {
|
|||
});
|
||||
return newRecord;
|
||||
}
|
||||
|
||||
@Get('cloneSpeaker')
|
||||
async batchCloneAudio(@Body() object: CloneSpeakDto) {
|
||||
// try {
|
||||
// if (!object.roleId || !object.roleName || !object.url) {
|
||||
// return ApiResponse.failToMessage('参数错误');
|
||||
// }
|
||||
// // 验证音频URL是否可访问
|
||||
// const response = await axios.head(object.url);
|
||||
// if (response.status !== 200) {
|
||||
// return ApiResponse.failToMessage('音频文件无法访问');
|
||||
// }
|
||||
// // 验证文件类型
|
||||
// const contentType = response.headers['content-type'];
|
||||
// if (!contentType.includes('audio')) {
|
||||
// return ApiResponse.failToMessage('文件类型必须是音频');
|
||||
// }
|
||||
// this.redisTaskService.addTask(object);
|
||||
// return ApiResponse.success(null, '任务添加成功');
|
||||
// } catch (error) {
|
||||
// console.error('添加克隆任务失败:', error);
|
||||
// return ApiResponse.failToMessage('添加任务失败:' + error.message);
|
||||
// }
|
||||
}
|
||||
|
||||
@Get('test')
|
||||
async batchCloneAudio1() {
|
||||
const result = [
|
||||
{
|
||||
roleId: '6704bd0ef48326fe51ddb751',
|
||||
roleName: '甘宁',
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E4%BB%A3%E5%8F%B7%E9%B8%A2%E5%AF%86%E6%8E%A2%E5%91%A8%E7%91%9C.mp3',
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E4%BB%A3%E5%8F%B7%E9%B8%A2%E5%AF%86%E6%8E%A2%E7%94%98%E5%AE%81.mp3',
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E4%BB%A3%E5%8F%B7%E9%B8%A2%E5%AF%86%E6%8E%A2%E7%94%98%E5%AE%81%20-%20%E5%89%AF%E6%9C%AC.wav',
|
||||
// 郭德纲
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E9%83%AD%E5%BE%B7%E7%BA%B2-%E5%A3%B0%E9%9F%B3%E5%85%8B%E9%9A%86.mp3',
|
||||
// 增以后的
|
||||
url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/WeChat_20241119150807_1.mp3',
|
||||
|
||||
// 周瑜
|
||||
// url: 'https://banban-systemcharter-speak.os-cn-beijing.aliyuncs.com/test/%E4%BB%A3%E5%8F%B7%E9%B8%A2%E5%AF%86%E6%8E%A2%E5%91%A8%E7%91%9C.mp3',
|
||||
// 周瑜 提高分贝后
|
||||
// url: 'https://banban-systemcharter-speak.oss-cn-beijing.aliyuncs.com/test/%E5%91%A8%E7%91%9C.mp3',
|
||||
},
|
||||
];
|
||||
result.forEach((item) => {
|
||||
this.redisTaskService.addTask(item);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,21 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { BullModule } from '@nestjs/bull';
|
||||
import { SystemCharterController } from './SystemCharter.controller';
|
||||
import { SystemCharterService } from './SystemCharter.servicer';
|
||||
import { RedisTaskService } from 'src/common/RedisTask/RedisTask.service';
|
||||
|
||||
@Module({
|
||||
imports: [],
|
||||
imports: [
|
||||
BullModule.registerQueue(
|
||||
{
|
||||
name: 'task_queue',
|
||||
},
|
||||
{
|
||||
name: 'dead_letter_queue',
|
||||
},
|
||||
),
|
||||
],
|
||||
controllers: [SystemCharterController],
|
||||
providers: [SystemCharterService],
|
||||
providers: [SystemCharterService, RedisTaskService],
|
||||
})
|
||||
export class SystemCharterModule {}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
import { IsOptional, IsString } from 'class-validator';
|
||||
|
||||
export class CloneSpeakDto {
|
||||
// 任务id
|
||||
@IsString()
|
||||
@IsOptional()
|
||||
taskId?: string;
|
||||
|
||||
// 角色id
|
||||
@IsString()
|
||||
roleId: string;
|
||||
|
||||
// 角色名称
|
||||
@IsString()
|
||||
roleName: string;
|
||||
|
||||
// 音频url
|
||||
@IsString()
|
||||
url: string;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
import 'dotenv/config';
|
||||
import { v4 as uuid } from 'uuid';
|
||||
import axios from 'axios';
|
||||
|
||||
export const TTSTiktokService =
|
||||
(isCustom = false, backupVoiceName?: string) =>
|
||||
async (text: string, onComplete: (audioData: Buffer) => Promise<any>, voiceName: string) => {
|
||||
const body = {
|
||||
app: {
|
||||
appid: process.env.TiktokAppId,
|
||||
token: 'default_token',
|
||||
// cluster: "volcano_tts",
|
||||
cluster: isCustom ? 'volcano_mega' : 'volcano_tts',
|
||||
},
|
||||
user: {
|
||||
uid: 'volcano_user',
|
||||
},
|
||||
audio: {
|
||||
voice_type: voiceName,
|
||||
encoding: 'pcm',
|
||||
emotion: 'happy',
|
||||
language: 'cn',
|
||||
rate: 16000,
|
||||
},
|
||||
request: {
|
||||
reqid: uuid(),
|
||||
text: text,
|
||||
text_type: 'plain',
|
||||
operation: 'query',
|
||||
pure_english_opt: '1',
|
||||
},
|
||||
};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
axios
|
||||
.post(`https://openspeech.bytedance.com/api/v1/tts`, body, {
|
||||
headers: {
|
||||
Authorization: `Bearer;${process.env.TiktokAccessToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
method: 'POST',
|
||||
})
|
||||
.then(async (res) => {
|
||||
const base64Data = res.data.data;
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
await onComplete(buffer);
|
||||
resolve(true);
|
||||
})
|
||||
.catch(async (e) => {
|
||||
// 若是自定义语音出现问题,比如超过并发,则使用备用语音
|
||||
if (isCustom) {
|
||||
try {
|
||||
await TTSTiktokService(false)(text, onComplete, backupVoiceName!);
|
||||
resolve(true);
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
}
|
||||
console.log(e);
|
||||
reject(e);
|
||||
});
|
||||
});
|
||||
};
|
|
@ -0,0 +1,242 @@
|
|||
import axios from 'axios';
|
||||
import { Signer } from '@volcengine/openapi';
|
||||
import { pcmToWavArrayBuffer } from 'src/utils/pcmToWav';
|
||||
import AliOss, { OSSRegionType } from 'src/utils/aliOss';
|
||||
import { v4 as uuid } from 'uuid';
|
||||
|
||||
export type ReturnBaseType = {
|
||||
BaseResp: {
|
||||
StatusCode: number;
|
||||
StatusMessage: string;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* 火山引擎语音服务
|
||||
*/
|
||||
export class VolcenAudioSpeakService {
|
||||
// 火山引擎base api地址
|
||||
static ENGIN_BASE_URL_API = 'https://open.volcengineapi.com/';
|
||||
// 训练声音
|
||||
static SPEAK_CLONE_API = 'https://openspeech.bytedance.com/api/v1/mega_tts/audio/upload';
|
||||
// TTS API
|
||||
static TTS_API = 'https://openspeech.bytedance.com/api/v1/tts';
|
||||
|
||||
static TiktokAccessKeyId = 'AKLTMWNlY2Q1YzZlMTUwNDY2N2E3NTAyMmIyOWNjNjlhYWM';
|
||||
static TiktokAccessToken = '6UwsXXQWnVBhYKgYjp5DoPedwU3XDecn';
|
||||
static TiktokSecretKeyId = 'TlRVd05UVXlOalkzT0RFMk5EVTFORGsyWXpjNU5UZzBOak5pT0RKbU9EUQ==';
|
||||
static TiktokAppId = '8167092294';
|
||||
|
||||
/**
|
||||
* @description 文本转语音
|
||||
* @param text 文本内容
|
||||
* @param speakerId 说话人ID
|
||||
*/
|
||||
public async textToSpeech(text: string, speakerId: string) {
|
||||
const randomUUID = uuid();
|
||||
const ttsBody = {
|
||||
app: {
|
||||
appid: VolcenAudioSpeakService.TiktokAppId,
|
||||
token: 'default_token',
|
||||
cluster: 'volcano_mega',
|
||||
// cluster: 'volcano_mega' : 'volcano_tts',
|
||||
},
|
||||
user: {
|
||||
uid: 'volcano_user',
|
||||
},
|
||||
audio: {
|
||||
voice_type: speakerId,
|
||||
encoding: 'pcm',
|
||||
emotion: 'happy',
|
||||
language: 'cn',
|
||||
rate: 16000,
|
||||
},
|
||||
request: {
|
||||
reqid: randomUUID,
|
||||
text: text,
|
||||
text_type: 'plain',
|
||||
operation: 'query',
|
||||
pure_english_opt: '1',
|
||||
},
|
||||
};
|
||||
|
||||
const volenResult = await axios.post(VolcenAudioSpeakService.TTS_API, ttsBody, {
|
||||
headers: {
|
||||
'Resource-Id': 'volc.megatts.tts',
|
||||
Authorization: `Bearer;${VolcenAudioSpeakService.TiktokAccessToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(volenResult.data.data, 'base64');
|
||||
const fileName = `${speakerId}_${randomUUID}.wav`;
|
||||
await AliOss('stayby-static', OSSRegionType.SH).put(
|
||||
`custom-training-voice-example/${fileName}`,
|
||||
Buffer.from(pcmToWavArrayBuffer(buffer)),
|
||||
);
|
||||
const ossUrl = `https://stayby-static.oss-cn-shanghai.aliyuncs.com/custom-training-voice-example/${fileName}`;
|
||||
return {
|
||||
url: ossUrl,
|
||||
speakerId,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 训练声音
|
||||
* @param speakerId
|
||||
* @param base64
|
||||
*/
|
||||
public async speakClone<T = ReturnBaseType & { speaker_id: string }>(
|
||||
speakerId: string,
|
||||
base64: string | Buffer,
|
||||
): Promise<T> {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const tiktokBody = {
|
||||
speaker_id: speakerId,
|
||||
appid: VolcenAudioSpeakService.TiktokAppId,
|
||||
audios: [
|
||||
{
|
||||
audio_bytes: base64,
|
||||
audio_format: 'wav',
|
||||
// 添加音频参数要求
|
||||
// sample_rate: 16000, // 采样率必须是16kHz
|
||||
// channels: 1, // 单声道
|
||||
// bits: 16, // 16位深度
|
||||
},
|
||||
],
|
||||
source: 2,
|
||||
};
|
||||
// 开始克隆声音,参数: { speaker_id: 'S_FC60x0Gb1', appid: '8167092294', audio_format: 'wav' }
|
||||
|
||||
console.log('开始克隆声音,参数:', {
|
||||
speaker_id: speakerId,
|
||||
appid: VolcenAudioSpeakService.TiktokAppId,
|
||||
audio_format: 'wav',
|
||||
});
|
||||
axios
|
||||
.post(VolcenAudioSpeakService.SPEAK_CLONE_API, tiktokBody, {
|
||||
headers: {
|
||||
'Resource-Id': 'volc.megatts.voiceclone',
|
||||
Authorization: `Bearer;${VolcenAudioSpeakService.TiktokAccessToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then((result) => {
|
||||
// 检查业务状态码
|
||||
if (result.data?.BaseResp?.StatusCode !== 0) {
|
||||
console.error('克隆声音业务错误:', result.data);
|
||||
throw new Error(`训练声音业务错误: ${result.data?.BaseResp?.StatusMessage || '未知错误'}`);
|
||||
}
|
||||
resolve(result.data as T);
|
||||
})
|
||||
.catch((error) => {
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 激活声音
|
||||
* @param voiceId
|
||||
*/
|
||||
public async speakActivate(voiceId: string) {
|
||||
const activatePost = {
|
||||
region: 'cn-north-1',
|
||||
method: 'POST',
|
||||
params: {
|
||||
Action: 'ActivateMegaTTSTrainStatus',
|
||||
Version: '2023-11-07',
|
||||
},
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: {
|
||||
AppId: VolcenAudioSpeakService.TiktokAppId,
|
||||
SpeakerIDs: [voiceId],
|
||||
},
|
||||
};
|
||||
const signer = new Signer(activatePost, 'speech_saas_prod');
|
||||
|
||||
signer.addAuthorization({
|
||||
accessKeyId: VolcenAudioSpeakService.TiktokAccessKeyId,
|
||||
secretKey: VolcenAudioSpeakService.TiktokSecretKeyId,
|
||||
});
|
||||
// 激活
|
||||
const activateVoiceResult = await axios.post(
|
||||
VolcenAudioSpeakService.ENGIN_BASE_URL_API + '?Action=ActivateMegaTTSTrainStatus&Version=2023-11-07',
|
||||
activatePost.body,
|
||||
{
|
||||
headers: activatePost.headers,
|
||||
},
|
||||
);
|
||||
|
||||
const hasCurrentSpeakId = activateVoiceResult.data?.Result.Statuses.find((item: any) => item.SpeakerID === voiceId);
|
||||
return hasCurrentSpeakId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取speakId
|
||||
*/
|
||||
public async getVoiceId() {
|
||||
const query = {
|
||||
region: 'cn-north-1',
|
||||
method: 'POST',
|
||||
params: {
|
||||
Action: 'ListMegaTTSTrainStatus',
|
||||
Version: '2023-11-07',
|
||||
},
|
||||
headers: {
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
},
|
||||
body: {
|
||||
AppId: VolcenAudioSpeakService.TiktokAppId,
|
||||
},
|
||||
};
|
||||
|
||||
const signer = new Signer(query, 'speech_saas_prod');
|
||||
signer.addAuthorization({
|
||||
accessKeyId: VolcenAudioSpeakService.TiktokAccessKeyId,
|
||||
secretKey: VolcenAudioSpeakService.TiktokSecretKeyId,
|
||||
});
|
||||
const megaListResult = await axios.post(
|
||||
VolcenAudioSpeakService.ENGIN_BASE_URL_API + `?Action=ListMegaTTSTrainStatus&Version=2023-11-07`,
|
||||
query.body,
|
||||
{
|
||||
headers: query.headers,
|
||||
},
|
||||
);
|
||||
// 获取未激活的音色
|
||||
const unknownStatusItem = megaListResult.data.Result.Statuses.find((item: any) => item.State === 'Unknown');
|
||||
return unknownStatusItem.SpeakerID;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取音频base64
|
||||
* @param ossUrl OSS文件URL
|
||||
*/
|
||||
public async getAudioBase64(ossUrl: string): Promise<string> {
|
||||
try {
|
||||
const response = await axios.get(ossUrl, {
|
||||
responseType: 'arraybuffer',
|
||||
});
|
||||
|
||||
// 确保音频数据符合要求:16kHz采样率,单声道,16位深度的WAV格式
|
||||
const audioBuffer = Buffer.from(response.data);
|
||||
const wavBuffer = await this.convertToRequiredFormat(audioBuffer);
|
||||
return wavBuffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('获取音频base64失败:', error);
|
||||
throw new Error(`获取音频base64失败: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 将音频转换为所需格式
|
||||
*/
|
||||
private async convertToRequiredFormat(audioBuffer: Buffer): Promise<Buffer> {
|
||||
// 使用pcmToWavArrayBuffer进行格式转换
|
||||
// 确保输出的WAV文件符合:16kHz采样率,单声道,16位深度
|
||||
const wavArrayBuffer = pcmToWavArrayBuffer(audioBuffer);
|
||||
|
||||
return Buffer.from(wavArrayBuffer);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
import * as aliOss from 'ali-oss';
|
||||
|
||||
const defaultBucket = 'stayby-static-fast';
|
||||
|
||||
export enum OSSRegionType {
|
||||
BJ = 'oss-cn-beijing',
|
||||
SH = 'oss-cn-shanghai',
|
||||
}
|
||||
|
||||
const AliOss = (bucket = defaultBucket, region = OSSRegionType.SH) => {
|
||||
return new aliOss({
|
||||
region,
|
||||
bucket: bucket,
|
||||
accessKeyId: 'LTAI5tEday8PJNaMTz5mp8g4',
|
||||
accessKeySecret: 'ck84eTxx4aSTjornlYrCy8RkurCHfc',
|
||||
secure: true,
|
||||
});
|
||||
};
|
||||
|
||||
export default AliOss;
|
|
@ -0,0 +1,74 @@
|
|||
// const pcmToWavArrayBufferbu = (pcmData: ArrayBuffer) => {
|
||||
// // 添加44字节的wav文件头
|
||||
// const pcmHeadString = "52 49 46 46 24 5C 02 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 01 00 80 3e 00 00 00 7d 00 00 02 00 10 00 64 61 74 61 00 5C 02 00";
|
||||
// const pcmHeadArray = pcmHeadString.split(" ");
|
||||
// const pcmHead = new Uint8Array(pcmHeadArray.length);
|
||||
// for (let i = 0; i < pcmHeadArray.length; i++) {
|
||||
// pcmHead[i] = parseInt(pcmHeadArray[i]!, 16);
|
||||
// }
|
||||
// let wavData = new Uint8Array(pcmData.byteLength + pcmHead.byteLength);
|
||||
// wavData.set(pcmHead, 0);
|
||||
// wavData.set(new Uint8Array(pcmData), pcmHead.byteLength);
|
||||
// return wavData.buffer;
|
||||
// }
|
||||
|
||||
const pcmToWavArrayBuffer = (pcmData: ArrayBuffer) => {
|
||||
const sampleRate = 16000;
|
||||
const numChannels = 1;
|
||||
const bitdepth = 16;
|
||||
// const numFrames = pcmData.byteLength / (bitdepth / 8);
|
||||
const wavData = new ArrayBuffer(44 + pcmData.byteLength);
|
||||
const view = new DataView(wavData);
|
||||
const writeString = (view: DataView, offset: number, str: string) => {
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
view.setUint8(offset + i, str.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
const writeInt32 = (view: DataView, offset: number, value: number) => {
|
||||
view.setUint32(offset, value, true);
|
||||
};
|
||||
const writeInt16 = (view: DataView, offset: number, value: number) => {
|
||||
view.setUint16(offset, value, true);
|
||||
};
|
||||
/* RIFF identifier */
|
||||
writeString(view, 0, 'RIFF');
|
||||
/* file length */
|
||||
writeInt32(view, 4, 36 + pcmData.byteLength);
|
||||
/* RIFF type */
|
||||
writeString(view, 8, 'WAVE');
|
||||
/* format chunk identifier */
|
||||
writeString(view, 12, 'fmt ');
|
||||
/* format chunk length */
|
||||
writeInt32(view, 16, 16);
|
||||
/* sample format (raw) */
|
||||
writeInt16(view, 20, 1);
|
||||
/* channel count */
|
||||
writeInt16(view, 22, numChannels);
|
||||
/* sample rate */
|
||||
writeInt32(view, 24, sampleRate);
|
||||
/* byte rate (sample rate * block align) */
|
||||
writeInt32(view, 28, sampleRate * 4);
|
||||
/* block align (channel count * bytes per sample) */
|
||||
writeInt16(view, 32, numChannels * 2);
|
||||
/* bits per sample */
|
||||
writeInt16(view, 34, 16);
|
||||
/* data chunk identifier */
|
||||
writeString(view, 36, 'data');
|
||||
/* data chunk length */
|
||||
writeInt32(view, 40, pcmData.byteLength);
|
||||
|
||||
// 将wavData转成UInt8Array,然后将pcmData的内容拼接到wavData后面
|
||||
const finalData = new Uint8Array(44 + pcmData.byteLength);
|
||||
finalData.set(new Uint8Array(view.buffer), 0);
|
||||
finalData.set(new Uint8Array(pcmData), 44);
|
||||
return finalData.buffer;
|
||||
};
|
||||
|
||||
const wavToPcmArrayBuffer = (wavData: ArrayBuffer) => {
|
||||
// 把wav文件头去掉即可成为pcm文件
|
||||
const pcmData = wavData.slice(44);
|
||||
return pcmData;
|
||||
};
|
||||
|
||||
export { pcmToWavArrayBuffer, wavToPcmArrayBuffer };
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue