Miguel Roca
Migue
Preliminary crysta
Dental fluorosis a
In this series, I
On Wednesday, the
Q:
how to remove
{
"action": {
Ford announced thi
Q:
Adding custom
Terry Farrell (dirpackage com.alibaba.alink.operator.common.clustering.randomclusters;
import com.alibaba.alink.operator.common.clustering.ClusterAssignmentModel;
import com.alibaba.alink.operator.common.clustering.ClusterModel;
import com.alibaba.alink.operator.common.distance.ClusterDistanceMetric;
import com.alibaba.alink.params.clustering.KMeansParams;
import com.alibaba.alink.params.clustering.CentroidInitialization;
import com.alibaba.alink.params.clustering.ClusteringMetricType;
import com.alibaba.alink.params.clustering.KMeansDistanceParams;
import com.alibaba.alink.params.clustering.KMeansWeightParam;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class RandomClusterAssigner extends RandomClusteringAlgorithm {
public static final String MODE_USE_LOCAL_CLUSTER = "use_local_cluster";
protected int currentClusterId = 0;
protected ClusterAssignmentModel clusterAssignment;
protected int numSampleFeatures = 0;
protected String tempDir;
protected String[] fileList;
private int numTopCluster = 0;
protected HashMap> labelmap;
protected String fileName;
private String modelPath;
public RandomClusterAssigner() {
this(null, null, null, null, null, null);
}
public RandomClusterAssigner(String modelPath, String fileList, String tempDir, String[] fileList1, KMeansDistanceParams kmdparams,
KMeansWeightParam weightparam, KMeansParams kmsparams, int numOfIteration) {
this.tempDir = tempDir;
this.fileList = fileList;
this.modelPath = modelPath;
if (kmdparams != null) {
KMeansDistanceParams localParams = kmdparams.clone();
localParams.setCenterNumber(kmdparams.centerNumber);
kmsparams.setDistanceMetricType(ClusteringMetricType.WEIGHTED);
kmsparams.setKmInitializer((CentroidInitialization) localParams.getCentroidInitialization());
kmsparams.setDistanceMetricType(ClusteringMetricType.CLUSTER_EMBEDDING);
} else {
kmsparams.setDistanceMetricType(ClusteringMetricType.WEIGHTED);
kmsparams.setKmInitializer((CentroidInitialization) null);
}
if (numOfIteration > 0) {
this.numOfIteration = numOfIteration;
} else {
this.numOfIteration = 20;
}
}
@Override
public void run(String[] args) throws Exception {
String userCols = "categorical_cols";
if (args != null) {
userCols = args[0];
}
if (args != null && args.length > 0) {
try {
String modelPath1 = args[0];
if (!new File(modelPath1).exists()) {
new File(modelPath1).createNewFile();
}
modelPath = modelPath1;
if (args.length == 1) {
fileList = new String[args.length];
for (int i = 0; i < args.length; i++) {
fileList[i] = args[i];
}
} else if (args.length > 1) {
fileList = new String[args.length];
for (int i = 0; i < args.length; i++) {
fileList[i] = args[i];
}
}
tempDir = args[1];
clusterAssignment = new ClusterAssignmentModel(userCols, fileList, tempDir);
System.out.println("clusterAssignment: " + clusterAssignment);
fileName = "model" + FileUtils.getBaseName(modelPath) + ".txt";
labelmap = new HashMap>();
List allFiles = new ArrayList();
FileUtils.recursiveGetFileTree(new File(tempDir), new String[] { fileList[0] }, new File(fileName));
for (String string : fileList[0].split(",")) {
allFiles.add(string);
}
RandomSample sample = new RandomSample(allFiles);
numSampleFeatures = sample.size();
clusterAssignment.buildModel(sample.getSampleFeatures(), sample.getSampleLabels(), numSampleFeatures);
clusterAssignment.loadModel(new File(fileName));
String fileList1 = null;
int nclass = 10;
if (args.length > 2) {
nclass = Integer.parseInt(args[2]);
}
String[] allFiles1 = sample.getSampleLabels().split(",");
String[][] tempAllFiles = new String[allFiles.size()][];
for (int i = 0; i < allFiles.size(); i++) {
tempAllFiles[i] = new String[]{fileList[0], modelPath + File.separator + "cluster_" + (i + 1) + ".txt"};
List tmp = sample.sampleSampled(tempAllFiles[i], nclass, kmdparams.numOfCenters);
int index = 0;
for (String s : tmp) {
clusterAssignment.addFeature(index++, new String[]{s});
}
if (tmp.size() >= 2) {
int[] map1 = new int[clusterAssignment.getNumberOfFeatures()];
for (int i = 0; i < clusterAssignment.getNumberOfFeatures(); i++) {
map1[i] = 0;
}
for (int i = 0; i < tempAllFiles[i].length; i++) {
String[] str1 = tempAllFiles[i][i].split(",");
clusterAssignment.addFeature(index++, new String[]{str1[0], str1[1]});
String[] str2 = clusterAssignment.getLabelColumn().split(",");
String[] str2a = str2[0].split(",");
String[] str2b = str2[1].split(",");
if (!fileName.endsWith("0") && map1[str2b[1].split(",")[0]] == 0) {
System.out.println("The features in the row:" + str2a[0] + " " + str2a[1] + " do not exists in the current model, will be added.");
map1[str2b[1].split(",")[0]]++;
}
if (!fileName.endsWith("0") && str2[0].endsWith(fileList[0])) {
System.out.println("The feature in the row:" + str2[0] + " " + str2[1] + " exists in the current model and is located in the last position, not added.");
}
if (!fileName.endsWith("0") && str2[0].endsWith(fileList1[0])) {
System.out.println("The feature in the row:" + str2[0] + " " + str2[1] + " exists in the current model and is located in the first position, not added.");
}
if (!fileName.endsWith("0") && str2[0].endsWith(fileList1[0]) && str2[1].endsWith(fileList1[1])) {
System.out.println("The feature in the row:" + str2[0] + " " + str2[1] + " exists in the current model and is located in the first position, not added.");
}
if (str2a[0].endsWith(fileList1[0]) || str2a[1].endsWith(fileList1[1])) {
System.out.println("The features in the row:" + str2a[0] + " " + str2a[1] + " exist in