Java源码示例:org.dmg.pmml.clustering.ClusteringField
示例1
private List<ClusteringField> getCenterClusteringFields(){
ClusteringModel clusteringModel = getModel();
List<ClusteringField> clusteringFields = clusteringModel.getClusteringFields();
List<ClusteringField> result = new ArrayList<>(clusteringFields.size());
for(int i = 0, max = clusteringFields.size(); i < max; i++){
ClusteringField clusteringField = clusteringFields.get(i);
ClusteringField.CenterField centerField = clusteringField.getCenterField();
switch(centerField){
case TRUE:
result.add(clusteringField);
break;
case FALSE:
break;
default:
throw new UnsupportedAttributeException(clusteringField, centerField);
}
}
return result;
}
示例2
private ClusteringModel pmmlClusteringModel(KMeansModel model,
Map<Integer,Long> clusterSizesMap) {
Vector[] clusterCenters = model.clusterCenters();
List<ClusteringField> clusteringFields = new ArrayList<>();
for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
if (inputSchema.isActive(i)) {
FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
ClusteringField clusteringField =
new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
clusteringFields.add(clusteringField);
}
}
List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
for (int i = 0; i < clusterCenters.length; i++) {
clusters.add(new Cluster().setId(Integer.toString(i))
.setSize(clusterSizesMap.get(i).intValue())
.setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
}
return new ClusteringModel(
MiningFunction.CLUSTERING,
ClusteringModel.ModelClass.CENTER_BASED,
clusters.size(),
AppPMMLUtils.buildMiningSchema(inputSchema),
new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
clusteringFields,
clusters);
}
示例3
private <V extends Number> ClusterAffinityDistribution<V> evaluateDistance(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
ClusteringModel clusteringModel = getModel();
List<Cluster> clusters = clusteringModel.getClusters();
Value<V> adjustment;
MissingValueWeights missingValueWeights = clusteringModel.getMissingValueWeights();
if(missingValueWeights != null){
Array array = missingValueWeights.getArray();
List<? extends Number> adjustmentValues = ArrayUtil.asNumberList(array);
if(values.size() != adjustmentValues.size()){
throw new InvalidElementException(missingValueWeights);
}
adjustment = MeasureUtil.calculateAdjustment(valueFactory, values, adjustmentValues);
} else
{
adjustment = MeasureUtil.calculateAdjustment(valueFactory, values);
}
ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.DISTANCE, clusters);
for(Cluster cluster : clusters){
List<FieldValue> clusterValues = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterValueCache);
if(values.size() != clusterValues.size()){
throw new InvalidElementException(cluster);
}
Value<V> distance = MeasureUtil.evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values, clusterValues, adjustment);
result.put(cluster, distance);
}
return result;
}
示例4
static
private List<ClusteringField> createClusteringFields(String... names){
List<ClusteringField> result = new ArrayList<>(names.length);
for(String name : names){
ClusteringField clusteringField = new ClusteringField(FieldName.create(name));
result.add(clusteringField);
}
return result;
}
示例5
public static PMML buildDummyClusteringModel() {
PMML pmml = PMMLUtils.buildSkeletonPMML();
List<DataField> dataFields = new ArrayList<>();
dataFields.add(new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE));
dataFields.add(new DataField(FieldName.create("y"), OpType.CONTINUOUS, DataType.DOUBLE));
DataDictionary dataDictionary =
new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
pmml.setDataDictionary(dataDictionary);
List<MiningField> miningFields = new ArrayList<>();
MiningField xMF = new MiningField(FieldName.create("x"))
.setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
miningFields.add(xMF);
MiningField yMF = new MiningField(FieldName.create("y"))
.setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
miningFields.add(yMF);
MiningSchema miningSchema = new MiningSchema(miningFields);
List<ClusteringField> clusteringFields = new ArrayList<>();
clusteringFields.add(new ClusteringField(
FieldName.create("x")).setCenterField(ClusteringField.CenterField.TRUE));
clusteringFields.add(new ClusteringField(
FieldName.create("y")).setCenterField(ClusteringField.CenterField.TRUE));
List<Cluster> clusters = new ArrayList<>();
clusters.add(new Cluster().setId("0").setSize(1).setArray(AppPMMLUtils.toArray(1.0, 0.0)));
clusters.add(new Cluster().setId("1").setSize(2).setArray(AppPMMLUtils.toArray(2.0, -1.0)));
clusters.add(new Cluster().setId("2").setSize(3).setArray(AppPMMLUtils.toArray(-1.0, 0.0)));
pmml.addModels(new ClusteringModel(
MiningFunction.CLUSTERING,
ClusteringModel.ModelClass.CENTER_BASED,
clusters.size(),
miningSchema,
new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
clusteringFields,
clusters));
return pmml;
}
示例6
@Override
public VisitorAction visit(ClusteringField clusteringField){
process(clusteringField.getField());
return super.visit(clusteringField);
}
示例7
@Override
protected <V extends Number> Map<FieldName, ClusterAffinityDistribution<V>> evaluateClustering(ValueFactory<V> valueFactory, EvaluationContext context){
ClusteringModel clusteringModel = getModel();
ComparisonMeasure comparisonMeasure = clusteringModel.getComparisonMeasure();
List<ClusteringField> clusteringFields = getCenterClusteringFields();
List<FieldValue> values = new ArrayList<>(clusteringFields.size());
for(int i = 0, max = clusteringFields.size(); i < max; i++){
ClusteringField clusteringField = clusteringFields.get(i);
FieldName name = clusteringField.getField();
if(name == null){
throw new MissingAttributeException(clusteringField, PMMLAttributes.CLUSTERINGFIELD_FIELD);
}
FieldValue value = context.evaluate(name);
values.add(value);
}
ClusterAffinityDistribution<V> result;
Measure measure = MeasureUtil.ensureMeasure(comparisonMeasure);
if(measure instanceof Similarity){
result = evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, values);
} else
if(measure instanceof Distance){
result = evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values);
} else
{
throw new UnsupportedElementException(measure);
}
// "For clustering models, the identifier of the winning cluster is returned as the predictedValue"
result.computeResult(DataType.STRING);
return Collections.singletonMap(getTargetName(), result);
}
示例8
private <V extends Number> ClusterAffinityDistribution<V> evaluateSimilarity(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
ClusteringModel clusteringModel = getModel();
List<Cluster> clusters = clusteringModel.getClusters();
ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.SIMILARITY, clusters);
BitSet flags = MeasureUtil.toBitSet(values);
for(Cluster cluster : clusters){
BitSet clusterFlags = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterFlagCache);
if(flags.size() != clusterFlags.size()){
throw new InvalidElementException(cluster);
}
Value<V> similarity = MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, clusterFlags);
result.put(cluster, similarity);
}
return result;
}
示例9
@Test
public void evaluateSimilarity(){
BitSet flags = createFlags(Arrays.asList(0, 0, 1, 1));
BitSet referenceFlags = createFlags(Arrays.asList(0, 1, 0, 1));
ValueFactory<?> valueFactory = MeasureUtilTest.valueFactoryFactory.newValueFactory(MathContext.DOUBLE);
ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.SIMILARITY, new SimpleMatching());
List<ClusteringField> clusteringFields = createClusteringFields("one", "two", "three", "four");
assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
comparisonMeasure.setMeasure(new Jaccard());
assertEquals(valueFactory.newValue(1d / 3d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
comparisonMeasure.setMeasure(new Tanimoto());
assertEquals(valueFactory.newValue(2d / (1d + 2 * 2d + 1d)), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
comparisonMeasure.setMeasure(new BinarySimilarity(0.5d, 0.5d, 0.5d, 0.5d, 1d, 1d, 1d, 1d));
assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
}