在JavaScript中对数组进行分组以制作直方图

11

我在Javascript中有一个数组,需要将其分成20个桶。数据值介于0和1之间,因此桶大小为0.05。我觉得应该有一个函数可以接受两个参数:一个是数组,另一个是桶大小,但我找不到这样的函数。我知道D3.js有一些功能可以帮助构建这样的数组,但我无法确定哪个函数可能有帮助。

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

如果我能弄清楚这句话的意思,我就可以创造奇迹了,但是...“数据值介于0和1之间,因此箱子大小为0.05。” - Redu
请检查数组,所有的值都在0到1的范围内,换句话说,没有低于0或高于1的值。 - NodeJS_dev
是的,我注意到了,但是什么是bin size,什么是bucket? - Redu
1
好的,bins是数据组,类间隔,如果你愿意这么说。Bucket是每个数据组的常见统计短语。 - NodeJS_dev
4个回答

15
随着 D3.js v6 的发布,d3.layout.histogram 已被 取代,现在属于 d3.bin(),它现在属于 d3-array 模块。
要将数组分组,您需要创建一个直方图生成器:
var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0,1]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

这里有一些更多选项可以配置您的阈值,以此来配置您的分组,但是这个生成器将会完全按照您的要求进行操作。您可以通过使用您的数值数组调用该生成器来取回计算后的分组数据:
var bins = histGenerator(arr);

看看这个可运行的示例:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
];

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0;]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

var bins = histGenerator(arr);
console.log(bins);
<script src="http://d3js.org/d3.v6.js"></script>


我正在尝试在 node.js 上运行这个解决方案,但迄今为止没有成功。似乎加载 d3 模块存在问题。这很令人沮丧,因为这个解决方案看起来完美无缺。 - Emman

7

您可以使用一些JS自己制作垃圾箱:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

var bins = [];
var binCount = 0;
var interval = .05;
var numOfBuckets = 1;

//Setup Bins
for(var i = 0; i < numOfBuckets; i += interval){
  bins.push({
    binNum: binCount,
    minNum: i,
    maxNum: i + interval,
    count: 0
  })
  binCount++;
}

//Loop through data and add to bin's count
for (var i = 0; i < arr.length; i++){
  var item = arr[i];
  for (var j = 0; j < bins.length; j++){
    var bin = bins[j];
    if(item > bin.minNum && item <= bin.maxNum){
      bin.count++;
      break;  // An item can only be in one bin.
    }
  }  
}

https://jsbin.com/keropoyadu/edit?js,output


6
d3js库有一个d3.layout.histogram()函数,返回一个直方图布局对象,用于将数据分组到bins中。布局对象既是对象又是函数。您可以在布局对象上调用方法来设置所需的布局行为。然后,您可以调用布局对象将数据分组成一组bin的数组。每个bin是一个值的数组。每个bin还具有x、dx、dy的附加属性。
例如,以下代码将把数据分组为20个覆盖从0到1范围的bin。
var arr = ["0.362743", "0.357969", "0.356322", "0.355757", "0.358511", "0.357218", "0.356696", "0.354579", "0.828295", "0.391186", "0.378577", "0.39372", "0.396416", "0.395641", "0.37573", "0.379666", "0.377443", "0.391842", "0.402021", "0.377516", "0.38936", "0.38936", "0.400883", "0.393171", "0.374419", "0.400821", "0.380502", "0.396098", "0.388256", "0.398968", "0.392525", "0.401858", "0.387297", "0.376471", "0.378183", "0.379787", "0.382024", "0.387928", "0.395367", "0.391972", "0.381295", "0.391183", "0.383598", "0.386424", "0.384338", "0.401834", "0.406253", "0.392854", "0.399266", "0.400804", "0.391146", "0.395441", "0.396265", "0.397894", "0.384822", "0.385181", "0.395443", "0.400981", "0.401716", "0.406633", "0.406887", "0.40694", "0.391219", "0.387946", "0.398858", "0.402233", "0.388583", "0.389772", "0.397084", "0.711566", "0.954557", "0.524007", "0.672288", "0.668441", "0.421726", "0.549536", "0.932952", "0.397851", "0.395536", "0.354818", "0.374355", "0.375257", "0.362613", "0.391271", "0.379219", "0.363316", "0.866006", "0.862254", "0.864403", "0.861346", "0.845225", "0.784467", "0.801275", "0.638579", "0.847282", "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"];
var bins = d3.layout.histogram()  // create layout object
    .bins(20)       // to use 20 bins
    .range([0, 1])  // to cover range from 0 to 1
    (arr);          // group the data into the bins

代码运行后...
bins[i] is an array of values in the ith bin
bins[i].x is the lower bounds of the ith bin
bins[i].dx is the width of the ith bin
bins[i].x + bins[i].dx is the upper bounds of the ith bin
bins[i].y is the number of values in the ith bin

直方图布局对象的文档在...

https://github.com/d3/d3/wiki/Histogram-Layout

注意:默认情况下,布局对象将字符串值转换为数字值。因此,布局函数可以处理你的字符串值。


2
您想要的功能是直方图布局。您可以像这样操作:
var data = d3.layout.histogram()
    .bins(20)
    (arr);

这只是一个常见的示例,您需要调整数值。请查阅文档:https://github.com/d3/d3/wiki/Histogram-Layout


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接