Published unlisted
Edited
May 16, 2022
Insert cell
# Sample Data Viz from HIVE
Insert cell
#### Import data:
Insert cell
Changed in parent
-
myFileTable = d3Fetch.csv("https://hive.aws.biochemistry.gwu.edu/help/fileTable.csv", d3.autoType)
+
myFileTable = d3Fetch.csv("https://hive.aws.biochemistry.gwu.edu/help/mqc.fileTable.csv", d3.autoType)
Insert cell
#### Check that data imported as a csv table:
Insert cell
viewof table = Inputs.table(myFileTable)
Insert cell
//data = FileAttachment(myFileTable).csv({typed: true})
Insert cell
#### Visualize:
Insert cell
Plot.dot(myFileTable, {x: " A_fr", y: " qA"}).plot()
Insert cell
Added in parent
#### Sorted qA data with short reads:
Insert cell
Added in parent
Plot.plot({
  grid: true,
  marks: [
    Plot.dot(myFileTable, {x: "FileName", y: " qA", sort: {x: "y"}, marks: "…", style: {color: "red"}, text: "Quality (Phred33)"})
  ]
})
Insert cell
Added in parent
#### Two alternate Plot.dot formats:
Insert cell
Plot.plot({
grid: true,
marks: [
Plot.dot(myFileTable, {x: " A_fr", y: " qA"})
]
})
Insert cell
Added in parent
##### Quality of all nucleotides (sorted by adenine, red). Adenine seems to generally be the lowest quality:
Insert cell
Added in parent
adenine = Plot.plot({
  label: "Long read quality, sorted",
  yLabel: "Quality score (Phred33)",
    x: {
    ticks: null
  },
  y: {
    domain: [0, 13]},
  grid: true,
  legend: true,
  marks: [
    Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.25, sort: {x:"y"}}),
    Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}),
    Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2}),
    Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.1})
  ]
})
Insert cell
Added in parent
##### Cytosine:
Insert cell
Added in parent
cytosine = Plot.plot({
  label: "Long read quality, sorted",
  yLabel: "Quality score (Phred33)",
  x: {
    ticks: null
  },
  y: {
    domain: [0, 13]},
  grid: true,
  legend: true,
  marks: [
    Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "orange", opacity: 0.3, sort: {x:"y"}}),
    Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.07}),
    Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.15}),
    Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.07})
  ]
})
Insert cell
Added in parent
##### Thymine:
Insert cell
Added in parent
thymine = Plot.plot({
  label: "Long read quality, sorted",
  yLabel: "Quality score (Phred33)",
   x: {
    ticks: null
  },
 y: {
    domain: [0, 13]},
  grid: true,
  legend: true,
  marks: [
    Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}),
    Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.1}),
    Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2, sort: {x:"y"}}),
    Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.1})
  ]
})
Insert cell
Added in parent
##### Guanine:
Insert cell
Added in parent
guanine = Plot.plot({
  label: "Long read quality, sorted",
  yLabel: "Quality score (Phred33)",
  x: {
    ticks: null
  },
  y: {
    domain: [0, 13]},
  grid: true,
  legend: true,
  marks: [
    Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}),
    Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.1}),
    Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2}),
    Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.2, sort: {x:"y"}})
  ]
})
Insert cell
Added in parent
##### Histogram:
Insert cell
Changed in parent
chartA = Histogram(myFileTable, { //width,
-
//value: d => d[" A_fr"], value: d => d[" C_fr"], //value: d => d[" T_fr"], //value: d => d[" G_fr"],
+
value: d => d[" A_fr"],
height: 500, width,
-
label: "Frequency of cytosine in sample",
+
label: "Sample",
//value: d => d.FileName, //thresholds: 100, //domain: [0, 1], color: "steelblue" //height: 240 })
Insert cell
Added in parent
##### No relationship between adenine frequency and quality:
Insert cell
Added in parent
Short reads:
Insert cell
Changed in parent
-
shortReads = Scatterplot(myFileTable, {
+
chartB = Scatterplot(myFileTable, {
x: d => d[" A_fr"], y: d => d[" qA"], title: d => d.FileName, xLabel: "A Frequency →", yLabel: "↑ A Quality",
-
yDomain: [26,40],
stroke: "steelblue",
-
width: 300, height: 300
+
width, height: 600
})
Insert cell
Added in parent
Long reads:
Insert cell
Added in parent
longReads = Scatterplot(myFileTable, {
  x: d => d[" A_fr"],
  y: d => d[" qA"],
  title: d => d.FileName,
  xLabel: "A Frequency →",
  yLabel: "↑ A Quality",
  yDomain: [0,15],
  stroke: "steelblue",
  width: 450,
  height: 450
})
Insert cell
Added in parent
<hr>
<hr>
Insert cell
# Appendix
Insert cell
Added in parent
#### Attempt to add regression line:
Insert cell
Added in parent
longReadsB = Scatterplot(myFileTable, {
  x: d => d[" A_fr"],
  y: d => d[" qA"],
  title: d => d.FileName,
  xLabel: "A Frequency →",
  yLabel: "↑ A Quality",
  yDomain: [0,15],
  stroke: "steelblue",
  width: 450,
  height: 450
}),
  linearRegression(myFileTable, {x: " A_fr", y: " qA"})
Insert cell
import {Scatterplot} from "@d3/scatterplot"
Insert cell
import {Histogram} from "@d3/histogram"
Insert cell
Added in parent
import {LineChart} from "@d3/multi-line-chart"
Insert cell
d3Fetch = require('d3-fetch')
Insert cell
Added in parent
function linearRegression(data, options = {}) {
  let {stroke, x, y, z} = options;
  let [vstroke, cstroke] = maybeColor(stroke, "currentColor");
  if (z === undefined && vstroke != null) z = vstroke;
  const X1 = [];
  const Y1 = [];
  const X2 = [];
  const Y2 = [];
  const S = vstroke ? [] : undefined;
  return Plot.link(data, {
    ...Plot.transform(options, (data, facets) => {
      const X = Plot.valueof(data, x);
      const Y = Plot.valueof(data, y);
      const Z = z !== undefined ? Plot.valueof(data, z) : undefined;
      const [x1, x2] = d3.extent(X);
      const regressionFacets = [];
      for (const facet of facets) {
        let F = facet.filter(i => defined(X[i]) && defined(Y[i]));
        const regressionFacet = [];
        for (const I of Z ? d3.group(F, i => Z[i]).values() : [F]) {
          const f = linearRegressionLine(I, X, Y);
          const i = I[0];
          X1[i] = x1;
          X2[i] = x2;
          Y1[i] = f(x1);
          Y2[i] = f(x2);
          regressionFacet.push(i);
        }
        regressionFacets.push(regressionFacet);
      }
      return {data, facets: regressionFacets};
    }),
    x1: X1,
    y1: Y1,
    x2: X2,
    y2: Y2
  });
}
Insert cell
Added in parent
function linearRegressionLine(I, X, Y) {
  const n = I.length;
  if (n === 1) return () => Y[I[0]];
  let sx = 0, sy = 0, sxx = 0, sxy = 0;
  for (const i of I) {
    const x = X[i];
    const y = Y[i];
    sx += x;
    sy += y;
    sxx += x * x;
    sxy += x * y;
  }
  const m = (n * sxy - sx * sy) / (n * sxx - sx * sx);
  const b = (sy - m * sx) / n;
  return x => b + m * x;
}
Insert cell
Added in parent
function maybeColor(value, defaultValue) {
  if (value === undefined) value = defaultValue;
  return value === null ? [undefined, "none"]
    : isColor(value) ? [undefined, value]
    : [value, undefined];
}
Insert cell
Added in parent
function isColor(value) {
  if (typeof value !== "string") return false;
  value = value.toLowerCase().trim();
  return value === "none"
    || value === "currentcolor"
    || (value.startsWith("url(") && value.endsWith(")")) // <funciri>, e.g. pattern or gradient
    || (value.startsWith("var(") && value.endsWith(")")) // CSS variable
    || d3.color(value) !== null;
}
Insert cell
Added in parent
function defined(x) {
  return x != null && !Number.isNaN(x);
}
Insert cell