Skip to content
Snippets Groups Projects
Commit 1c284048 authored by 家乐 袁's avatar 家乐 袁
Browse files

Merge branch '201250084' into 'master'

更新了数据挖掘部分

See merge request 0010-qingyun/backend!20
parents 3663fd10 9e4002fb
No related branches found
No related tags found
No related merge requests found
Pipeline #63722 passed
No preview for this file type
......@@ -22,7 +22,7 @@
<artifactId>sentistrength_backend</artifactId>
<version>1.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/sentiStrength-v1.0.19.jar</systemPath>
<systemPath>${project.basedir}/lib/sentiStrength-v1.0.20.jar</systemPath>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
......
package com.sentistrength.model.vo.dataMining;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ClassifyAllArffVO {
/**
* 类成员变量inFileName, String类型
* 输入文件名
*/
String inFileName;
/**
* 类成员变量inFile, String类型List
* 输入文件若干行
*/
List<String> inFile;
/**
* 类成员变量classifierName, String类型
* 分类器名
*/
String classifierName;
/**
* 类成员变量classifierExclude, String类型
* 排除的分类器名
*/
String classifierExclude;
/**
* 类成员变量features, int类型
* 特征数
*/
int features;
}
package com.sentistrength.model.vo.dataMining;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ClassifyAllArffWithoutSelectionVO {
/**
* 类成员变量inFileName, String类型
* 输入文件名
*/
String inFileName;
/**
* 类成员变量inFile, String类型List
* 输入文件
*/
List<String> inFile;
/**
* 类成员变量classifierName, String类型
* 分类器名
*/
String classifierName;
/**
* 类成员变量classifierExclude, String类型
* 排除的分类器名
*/
String classifierExclude;
}
package com.sentistrength.model.vo.dataMining;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DirectClassifyAllArffVO {
/**
* 类成员变量trainFileName String类型
* 输入的训练集文件名
*/
String trainFileName;
/**
* 类成员变量trainFile, String类型List
* 输入的训练集文件若干行
*/
List<String> trainFile;
/**
* 类成员变量evalFile, String类型List
* 输入的测试集文件若干行
*/
List<String> evalFile;
/**
* 类成员变量classifierName, String类型
* 分类器名
*/
String classifierName;
/**
* 类成员变量classifierExclude, String类型
* 排除的分类器名
*/
String classifierExclude;
}
package com.sentistrength.model.vo.dataMining;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class PredictArffClassVO {
/**
* 类成员变量inFile, String类型List
* 用作训练集的输入文件
*/
List<String> inFle;
/**
* 类成员变量classifier, String类型
* 分类器名
*/
String classifier;
/**
* 类成员变量unlabelledFile, String类型List
* 要分类的输入文件
*/
List<String> unlabelledFile;
/**
* 类成员变量classFor0, int类型
* 标签的最大值
*/
int classFor0;
}
......@@ -10,10 +10,7 @@ import uk.ac.wlv.sentistrength.ClassificationOptions;
import uk.ac.wlv.sentistrength.ClassificationResources;
import uk.ac.wlv.sentistrength.TextParsingOptions;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;
@Service
......@@ -44,9 +41,13 @@ public class DataMiningServiceImpl implements DataMiningService {
public List<String> convertSentimentTextToArff(ConvertSentimentTextToArffVO convertSentimentTextToArffVO) {
List<String> outFile;
textParsingOptions.igNgramSize = convertSentimentTextToArffVO.getNgram();
outFile = uk.ac.wlv.wkaclass.Arff.convertSentimentTextToArff(convertSentimentTextToArffVO.getInFileName(),convertSentimentTextToArffVO.getInFile(),
convertSentimentTextToArffVO.isHeaderLine(),textParsingOptions,classificationOptions,classificationResources,map.get(ClassificationMode.valueOf(convertSentimentTextToArffVO.getMode())),
convertSentimentTextToArffVO.getMinFeatureFrequency(),null);
try {
outFile = uk.ac.wlv.wkaclass.Arff.convertSentimentTextToArff(convertSentimentTextToArffVO.getInFileName(), convertSentimentTextToArffVO.getInFile(),
convertSentimentTextToArffVO.isHeaderLine(), textParsingOptions, classificationOptions, classificationResources, map.get(ClassificationMode.valueOf(convertSentimentTextToArffVO.getMode())),
convertSentimentTextToArffVO.getMinFeatureFrequency(), null);
}catch (Exception e) {
throw new MyServiceException("DS002", "将情感文本转换为Arff文件出错");
}
return outFile;
}
......@@ -58,7 +59,11 @@ public class DataMiningServiceImpl implements DataMiningService {
@Override
public List<String> combineTwoArffs(CombineTwoArffsVO combineTwoArffsVO) {
List<String> outFile;
outFile = uk.ac.wlv.wkaclass.Arff.combineTwoArffs(combineTwoArffsVO.getInFile1(),combineTwoArffsVO.getInFile2(),false);
try {
outFile = uk.ac.wlv.wkaclass.Arff.combineTwoArffs(combineTwoArffsVO.getInFile1(), combineTwoArffsVO.getInFile2(), false);
}catch (Exception e) {
throw new MyServiceException("DS003", "合并两个Arff文件出错");
}
return outFile;
}
......@@ -76,7 +81,7 @@ public class DataMiningServiceImpl implements DataMiningService {
}
outFile = uk.ac.wlv.wkaclass.Arff.deleteColumnFromArff(inFile,removeDataColumnVO.getColToRemove());
if (outFile == null){
throw new MyServiceException("DS002", "Arff文件删除列出错");
throw new MyServiceException("DS004", "Arff文件删除列出错");
}
return outFile;
}
......@@ -93,7 +98,11 @@ public class DataMiningServiceImpl implements DataMiningService {
if(inFile.size() == 0){
throw new MyServiceException("DS001", "Arff文件为空");
}
outFile = uk.ac.wlv.wkaclass.Arff.moveColumnToEndOfArff(inFile,moveColumnVO.getColToMove());
try {
outFile = uk.ac.wlv.wkaclass.Arff.moveColumnToEndOfArff(inFile, moveColumnVO.getColToMove());
}catch (Exception e){
throw new MyServiceException("DS005", "将Arff文件某一列移动到最后出错");
}
return outFile;
}
......@@ -109,7 +118,11 @@ public class DataMiningServiceImpl implements DataMiningService {
if(inFile.size() == 0){
throw new MyServiceException("DS001", "Arff文件为空");
}
outFile = uk.ac.wlv.wkaclass.Arff.deleteColAndMoveRemainingFirstColToEnd(inFile,deleteColAndMoveVO.getColToDelete());
try {
outFile = uk.ac.wlv.wkaclass.Arff.deleteColAndMoveRemainingFirstColToEnd(inFile, deleteColAndMoveVO.getColToDelete());
}catch (Exception e){
throw new MyServiceException("6", "删除某一列并将第一列移到最后出错");
}
return outFile;
}
......@@ -130,8 +143,12 @@ public class DataMiningServiceImpl implements DataMiningService {
int[][] iData = new int[iAttributeCount + 1][iDataCount + 1];
double[] fColIG = new double[iAttributeCount + 1];
String[] sAttributes = new String[iAttributeCount + 1];
uk.ac.wlv.wkaclass.Arff.readArffAttributesAndData(inFile, iAttributeCount, iDataCount, sAttributes, iData);
uk.ac.wlv.wkaclass.Arff.calculateInformationGainOfData(iData,iAttributeCount,iDataCount,fColIG);
try {
uk.ac.wlv.wkaclass.Arff.readArffAttributesAndData(inFile, iAttributeCount, iDataCount, sAttributes, iData);
uk.ac.wlv.wkaclass.Arff.calculateInformationGainOfData(iData, iAttributeCount, iDataCount, fColIG);
}catch (Exception e){
throw new MyServiceException("DS007", "计算信息增益出错");
}
return Arrays.stream(fColIG).boxed().collect(Collectors.toList());
}
......@@ -147,7 +164,11 @@ public class DataMiningServiceImpl implements DataMiningService {
if(inFile.size() == 0){
throw new MyServiceException("DS001", "Arff文件为空");
}
outFile = uk.ac.wlv.wkaclass.Arff.makeArffWithTopNAttributes(inFile,selectTopNFeaturesVO.getTopN());
try {
outFile = uk.ac.wlv.wkaclass.Arff.makeArffWithTopNAttributes(inFile, selectTopNFeaturesVO.getTopN());
}catch (Exception e) {
throw new MyServiceException("DS008", "用信息增益最高的N个Attributes新建Arff文件出错");
}
return outFile;
}
......@@ -170,9 +191,27 @@ public class DataMiningServiceImpl implements DataMiningService {
@Override
public List<String> mergeLabelledAndUnlabelled(MergeLabelledAndUnlabelledVO mergeLabelledAndUnlabelledVO) {
List<String> outFile;
outFile = uk.ac.wlv.wkaclass.Arff.mergeLabelledAndUnlabelledTextFiles(mergeLabelledAndUnlabelledVO.getLabelledFile(),mergeLabelledAndUnlabelledVO.getUnlabelledFile());
List<String> labelledFile = handleList(mergeLabelledAndUnlabelledVO.getLabelledFile());
if(labelledFile.size() == 0){
throw new MyServiceException("DS001", "Arff文件为空");
}
List<String> unlabelledFile = handleList(mergeLabelledAndUnlabelledVO.getUnlabelledFile());
if(unlabelledFile.size() == 0){
throw new MyServiceException("DS001", "Arff文件为空");
}
try {
outFile = uk.ac.wlv.wkaclass.Arff.mergeLabelledAndUnlabelledTextFiles(mergeLabelledAndUnlabelledVO.getLabelledFile(), mergeLabelledAndUnlabelledVO.getUnlabelledFile());
}catch(Exception e){
throw new MyServiceException("DS009", "合并标记好的和未标记好的文件出错");
}
return outFile;
}
/**
* 删除arff文件中的空行
* @param inFile 输入的Arff文件若干行
* @return List<String> 删除后的Arff文件若干行
*/
public List<String> handleList(List<String> inFile){
for(int i = 0; i < inFile.size(); i++){
if(inFile.get(i).equals("")){
......@@ -182,4 +221,72 @@ public class DataMiningServiceImpl implements DataMiningService {
}
return inFile;
}
/**
* 根据训练集和分类器预测未标记文件
* @param predictArffClassVO 根据训练集和分类器预测未标记文件VO
* @return List<String> 结果文件
*/
@Override
public List<String> predictArffClass(PredictArffClassVO predictArffClassVO) {
List<String> outFile;
try {
outFile = uk.ac.wlv.wkaclass.PredictClass.predictArffClass(predictArffClassVO.getInFle(), predictArffClassVO.getClassifier(), predictArffClassVO.getUnlabelledFile(), predictArffClassVO.getClassFor0());
}catch (Exception e){
throw new MyServiceException("DS010", "根据训练集预测未分类文件出错");
}
return outFile;
}
/**
* 根据输入的训练文件和测试文件训练分类器并测试,生成结果文件和总结文件
* @param directClassifyAllArffVO 据输入的训练文件和测试文件训练分类器并测试VO
* @return Map<List<String>,List<String>> 结果文件和总结文件
*/
@Override
public Map<List<String>, List<String>> directClassifyAllArff(DirectClassifyAllArffVO directClassifyAllArffVO) {
Map<List<String>, List<String>> outFile;
try {
outFile = uk.ac.wlv.wkaclass.WekaDirectTrainClassifyEvaluate.directClassifyAllArff(directClassifyAllArffVO.getTrainFileName(), directClassifyAllArffVO.getTrainFile(), directClassifyAllArffVO.getEvalFile(), directClassifyAllArffVO.getClassifierName(), directClassifyAllArffVO.getClassifierExclude());
}catch (Exception e) {
throw new MyServiceException("DS011", "根据输入的训练文件和测试文件训练分类器并测试出错");
}
return outFile;
}
/**
* 有选择的信息增益交叉验证
* @param classifyAllArffVO 有选择的信息增益交叉验证VO
* @return Map<List<String>, List<String>> 生成的结果和总结文件
*/
@Override
public Map<List<String>, List<String>> classifyAllArff(ClassifyAllArffVO classifyAllArffVO) {
Map<List<String>, List<String>> outFile;
Random random = new Random();
int randomSeed = random.nextInt();
try{
outFile = uk.ac.wlv.wkaclass.WekaCrossValidateInfoGain.classifyArff(classifyAllArffVO.getInFileName(),classifyAllArffVO.getInFile(), classifyAllArffVO.getClassifierName(), classifyAllArffVO.getClassifierExclude(), randomSeed,classifyAllArffVO.getFeatures());
}catch (Exception e){
throw new MyServiceException("DS012", "有选择的信息增益交叉验证出错");
}
return outFile;
}
/**
* 无选择的信息增益交叉验证
* @param classifyAllArffWithoutSelectionVO 无选择的信息增益交叉验证VO
* @return Map<List<String>, List<String>> 生成的结果和总结文件
*/
@Override
public Map<List<String>, List<String>> classifyAllArffWithoutSelection(ClassifyAllArffWithoutSelectionVO classifyAllArffWithoutSelectionVO) {
Map<List<String>, List<String>> outFile;
Random random = new Random();
int randomSeed = random.nextInt();
try{
outFile = uk.ac.wlv.wkaclass.WekaCrossValidateNoSelection.classifyAllArff(classifyAllArffWithoutSelectionVO.getInFileName(), classifyAllArffWithoutSelectionVO.getInFile(), classifyAllArffWithoutSelectionVO.getClassifierName(), classifyAllArffWithoutSelectionVO.getClassifierExclude(), randomSeed);
} catch (Exception e){
throw new MyServiceException("DS013", "无选择的信息增益交叉验证出错");
}
return outFile;
}
}
......@@ -4,7 +4,7 @@ package com.sentistrength.service.Interface.dataMining;
import com.sentistrength.model.vo.dataMining.*;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
public interface DataMiningService {
/**
......@@ -56,8 +56,36 @@ public interface DataMiningService {
void setOptions(SetOptionsVO setOptionsVO);
/**
* 合并标记好的文件和未标记好的文件
* @param mergeLabelledAndUnlabelledVO 合并标记好的和未标记好的文件
* @param mergeLabelledAndUnlabelledVO 合并标记好的和未标记好的文件VO
* @return List<String> 合并结果文件的若干行
*/
List<String> mergeLabelledAndUnlabelled(MergeLabelledAndUnlabelledVO mergeLabelledAndUnlabelledVO);
/**
* 根据训练集和分类器预测未标记文件
* @param predictArffClassVO 根据训练集和分类器预测未标记文件VO
* @return List<String> 生成结果文件若干行
*/
List<String> predictArffClass(PredictArffClassVO predictArffClassVO);
/**
* 根据输入的训练文件和测试文件训练分类器并测试,生成结果文件和总结文件
* @param directClassifyAllArffVO 据输入的训练文件和测试文件训练分类器并测试VO
* @return 生成的结果文件和测试文件
*/
Map<List<String>,List<String>> directClassifyAllArff(DirectClassifyAllArffVO directClassifyAllArffVO);
/**
* 有选择的信息增益交叉验证
* @param classifyAllArffVO 有选择的信息增益交叉验证VO
* @return Map<List<String>, List<String>> 生成的结果和总结文件
*/
Map<List<String>,List<String>> classifyAllArff(ClassifyAllArffVO classifyAllArffVO);
/**
* 无选择的信息增益交叉验证
* @param classifyAllArffWithoutSelectionVO 无选择的信息增益交叉验证VO
* @return Map<List<String>, List<String>> 生成的结果和总结文件
*/
Map<List<String>,List<String>> classifyAllArffWithoutSelection(ClassifyAllArffWithoutSelectionVO classifyAllArffWithoutSelectionVO);
}
......@@ -108,4 +108,44 @@ public class DataMiningController {
public Response mergeUnlabelledAndLabelled(@RequestBody MergeLabelledAndUnlabelledVO mergeLabelledAndUnlabelledVO){
return Response.buildSuccess(dataMiningService.mergeLabelledAndUnlabelled(mergeLabelledAndUnlabelledVO));
}
/**
* 根据训练集来预测未标记的文件
* @param predictArffClassVO 根据训练集来预测未标记的文件VO
* @return Response 返回前端的信息
*/
@PostMapping("/predictArffClass")
public Response predictArffClass(@RequestBody PredictArffClassVO predictArffClassVO) {
return Response.buildSuccess(dataMiningService.predictArffClass(predictArffClassVO));
}
/**
* 根据输入的训练文件和测试文件训练分类器并测试,生成结果文件和总结文件
* @param directClassifyAllArffVO 据输入的训练文件和测试文件训练分类器并测试VO
* @return Response 返回前端的信息
*/
@PostMapping("directClassifyAllArff")
public Response directClassifyAllArff(@RequestBody DirectClassifyAllArffVO directClassifyAllArffVO){
return Response.buildSuccess(dataMiningService.directClassifyAllArff(directClassifyAllArffVO));
}
/**
* 有选择的信息增益交叉验证
* @param classifyAllArffVO 有选择的信息增益交叉验证VO
* @return Response 返回前端的信息
*/
@PostMapping("classifyAllArff")
public Response classifyAllArff(@RequestBody ClassifyAllArffVO classifyAllArffVO){
return Response.buildSuccess(dataMiningService.classifyAllArff(classifyAllArffVO));
}
/**
* 无选择的信息增益交叉验证
* @param classifyAllArffWithoutSelectionVO 无选择的信息增益交叉验证VO
* @return Response 返回前端的信息
*/
@PostMapping("classifyAllArffWithoutSelection")
public Response classifyAllArffWithoutSelection(@RequestBody ClassifyAllArffWithoutSelectionVO classifyAllArffWithoutSelectionVO) {
return Response.buildSuccess(dataMiningService.classifyAllArffWithoutSelection(classifyAllArffWithoutSelectionVO));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment