Java源码示例:org.dmg.pmml.clustering.ClusteringField

示例1
private List<ClusteringField> getCenterClusteringFields(){
	ClusteringModel clusteringModel = getModel();

	List<ClusteringField> clusteringFields = clusteringModel.getClusteringFields();

	List<ClusteringField> result = new ArrayList<>(clusteringFields.size());

	for(int i = 0, max = clusteringFields.size(); i < max; i++){
		ClusteringField clusteringField = clusteringFields.get(i);

		ClusteringField.CenterField centerField = clusteringField.getCenterField();
		switch(centerField){
			case TRUE:
				result.add(clusteringField);
				break;
			case FALSE:
				break;
			default:
				throw new UnsupportedAttributeException(clusteringField, centerField);
		}
	}

	return result;
}
 
示例2
private ClusteringModel pmmlClusteringModel(KMeansModel model,
                                            Map<Integer,Long> clusterSizesMap) {
  Vector[] clusterCenters = model.clusterCenters();

  List<ClusteringField> clusteringFields = new ArrayList<>();
  for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
    if (inputSchema.isActive(i)) {
      FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
      ClusteringField clusteringField =
          new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
      clusteringFields.add(clusteringField);
    }
  }

  List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
  for (int i = 0; i < clusterCenters.length; i++) {
    clusters.add(new Cluster().setId(Integer.toString(i))
                     .setSize(clusterSizesMap.get(i).intValue())
                     .setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
  }

  return new ClusteringModel(
      MiningFunction.CLUSTERING,
      ClusteringModel.ModelClass.CENTER_BASED,
      clusters.size(),
      AppPMMLUtils.buildMiningSchema(inputSchema),
      new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
      clusteringFields,
      clusters);
}
 
示例3
private <V extends Number> ClusterAffinityDistribution<V> evaluateDistance(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
	ClusteringModel clusteringModel = getModel();

	List<Cluster> clusters = clusteringModel.getClusters();

	Value<V> adjustment;

	MissingValueWeights missingValueWeights = clusteringModel.getMissingValueWeights();
	if(missingValueWeights != null){
		Array array = missingValueWeights.getArray();

		List<? extends Number> adjustmentValues = ArrayUtil.asNumberList(array);
		if(values.size() != adjustmentValues.size()){
			throw new InvalidElementException(missingValueWeights);
		}

		adjustment = MeasureUtil.calculateAdjustment(valueFactory, values, adjustmentValues);
	} else

	{
		adjustment = MeasureUtil.calculateAdjustment(valueFactory, values);
	}

	ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.DISTANCE, clusters);

	for(Cluster cluster : clusters){
		List<FieldValue> clusterValues = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterValueCache);

		if(values.size() != clusterValues.size()){
			throw new InvalidElementException(cluster);
		}

		Value<V> distance = MeasureUtil.evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values, clusterValues, adjustment);

		result.put(cluster, distance);
	}

	return result;
}
 
示例4
static
private List<ClusteringField> createClusteringFields(String... names){
	List<ClusteringField> result = new ArrayList<>(names.length);

	for(String name : names){
		ClusteringField clusteringField = new ClusteringField(FieldName.create(name));

		result.add(clusteringField);
	}

	return result;
}
 
示例5
public static PMML buildDummyClusteringModel() {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE));
  dataFields.add(new DataField(FieldName.create("y"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField xMF = new MiningField(FieldName.create("x"))
      .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
  miningFields.add(xMF);
  MiningField yMF = new MiningField(FieldName.create("y"))
      .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
  miningFields.add(yMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  List<ClusteringField> clusteringFields = new ArrayList<>();
  clusteringFields.add(new ClusteringField(
      FieldName.create("x")).setCenterField(ClusteringField.CenterField.TRUE));
  clusteringFields.add(new ClusteringField(
      FieldName.create("y")).setCenterField(ClusteringField.CenterField.TRUE));

  List<Cluster> clusters = new ArrayList<>();
  clusters.add(new Cluster().setId("0").setSize(1).setArray(AppPMMLUtils.toArray(1.0, 0.0)));
  clusters.add(new Cluster().setId("1").setSize(2).setArray(AppPMMLUtils.toArray(2.0, -1.0)));
  clusters.add(new Cluster().setId("2").setSize(3).setArray(AppPMMLUtils.toArray(-1.0, 0.0)));

  pmml.addModels(new ClusteringModel(
      MiningFunction.CLUSTERING,
      ClusteringModel.ModelClass.CENTER_BASED,
      clusters.size(),
      miningSchema,
      new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
      clusteringFields,
      clusters));

  return pmml;
}
 
示例6
@Override
public VisitorAction visit(ClusteringField clusteringField){
	process(clusteringField.getField());

	return super.visit(clusteringField);
}
 
示例7
@Override
protected <V extends Number> Map<FieldName, ClusterAffinityDistribution<V>> evaluateClustering(ValueFactory<V> valueFactory, EvaluationContext context){
	ClusteringModel clusteringModel = getModel();

	ComparisonMeasure comparisonMeasure = clusteringModel.getComparisonMeasure();

	List<ClusteringField> clusteringFields = getCenterClusteringFields();

	List<FieldValue> values = new ArrayList<>(clusteringFields.size());

	for(int i = 0, max = clusteringFields.size(); i < max; i++){
		ClusteringField clusteringField = clusteringFields.get(i);

		FieldName name = clusteringField.getField();
		if(name == null){
			throw new MissingAttributeException(clusteringField, PMMLAttributes.CLUSTERINGFIELD_FIELD);
		}

		FieldValue value = context.evaluate(name);

		values.add(value);
	}

	ClusterAffinityDistribution<V> result;

	Measure measure = MeasureUtil.ensureMeasure(comparisonMeasure);

	if(measure instanceof Similarity){
		result = evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, values);
	} else

	if(measure instanceof Distance){
		result = evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values);
	} else

	{
		throw new UnsupportedElementException(measure);
	}

	// "For clustering models, the identifier of the winning cluster is returned as the predictedValue"
	result.computeResult(DataType.STRING);

	return Collections.singletonMap(getTargetName(), result);
}
 
示例8
private <V extends Number> ClusterAffinityDistribution<V> evaluateSimilarity(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
	ClusteringModel clusteringModel = getModel();

	List<Cluster> clusters = clusteringModel.getClusters();

	ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.SIMILARITY, clusters);

	BitSet flags = MeasureUtil.toBitSet(values);

	for(Cluster cluster : clusters){
		BitSet clusterFlags = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterFlagCache);

		if(flags.size() != clusterFlags.size()){
			throw new InvalidElementException(cluster);
		}

		Value<V> similarity = MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, clusterFlags);

		result.put(cluster, similarity);
	}

	return result;
}
 
示例9
@Test
public void evaluateSimilarity(){
	BitSet flags = createFlags(Arrays.asList(0, 0, 1, 1));
	BitSet referenceFlags = createFlags(Arrays.asList(0, 1, 0, 1));

	ValueFactory<?> valueFactory = MeasureUtilTest.valueFactoryFactory.newValueFactory(MathContext.DOUBLE);

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.SIMILARITY, new SimpleMatching());

	List<ClusteringField> clusteringFields = createClusteringFields("one", "two", "three", "four");

	assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new Jaccard());

	assertEquals(valueFactory.newValue(1d / 3d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new Tanimoto());

	assertEquals(valueFactory.newValue(2d / (1d + 2 * 2d + 1d)), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new BinarySimilarity(0.5d, 0.5d, 0.5d, 0.5d, 1d, 1d, 1d, 1d));

	assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
}