-myFileTable = d3Fetch.csv("https://hive.aws.biochemistry.gwu.edu/help/fileTable.csv", d3.autoType)+myFileTable = d3Fetch.csv("https://hive.aws.biochemistry.gwu.edu/help/mqc.fileTable.csv", d3.autoType)
#### Sorted qA data with short reads:
Plot.plot({ grid: true, marks: [ Plot.dot(myFileTable, {x: "FileName", y: " qA", sort: {x: "y"}, marks: "…", style: {color: "red"}, text: "Quality (Phred33)"}) ] })
#### Two alternate Plot.dot formats:
##### Quality of all nucleotides (sorted by adenine, red). Adenine seems to generally be the lowest quality:
adenine = Plot.plot({ label: "Long read quality, sorted", yLabel: "Quality score (Phred33)", x: { ticks: null }, y: { domain: [0, 13]}, grid: true, legend: true, marks: [ Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.25, sort: {x:"y"}}), Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}), Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2}), Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.1}) ] })
##### Cytosine:
cytosine = Plot.plot({ label: "Long read quality, sorted", yLabel: "Quality score (Phred33)", x: { ticks: null }, y: { domain: [0, 13]}, grid: true, legend: true, marks: [ Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "orange", opacity: 0.3, sort: {x:"y"}}), Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.07}), Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.15}), Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.07}) ] })
##### Thymine:
thymine = Plot.plot({ label: "Long read quality, sorted", yLabel: "Quality score (Phred33)", x: { ticks: null }, y: { domain: [0, 13]}, grid: true, legend: true, marks: [ Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}), Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.1}), Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2, sort: {x:"y"}}), Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.1}) ] })
##### Guanine:
guanine = Plot.plot({ label: "Long read quality, sorted", yLabel: "Quality score (Phred33)", x: { ticks: null }, y: { domain: [0, 13]}, grid: true, legend: true, marks: [ Plot.dot(myFileTable, {x: "FileName", y: " qC", stroke: "yellow", opacity: 0.3}), Plot.dot(myFileTable, {x: "FileName", y: " qA", stroke: "red", opacity: 0.1}), Plot.dot(myFileTable, {x: "FileName", y: " qT", stroke: "blue", opacity: 0.2}), Plot.dot(myFileTable, {x: "FileName", y: " qG", stroke: "green", opacity: 0.2, sort: {x:"y"}}) ] })
##### Histogram:
chartA = Histogram(myFileTable, { //width,-//value: d => d[" A_fr"], value: d => d[" C_fr"], //value: d => d[" T_fr"], //value: d => d[" G_fr"],+value: d => d[" A_fr"],height: 500, width,-label: "Frequency of cytosine in sample",+label: "Sample",//value: d => d.FileName, //thresholds: 100, //domain: [0, 1], color: "steelblue" //height: 240 })
##### No relationship between adenine frequency and quality:
Short reads:
-shortReads = Scatterplot(myFileTable, {+chartB = Scatterplot(myFileTable, {x: d => d[" A_fr"], y: d => d[" qA"], title: d => d.FileName, xLabel: "A Frequency →", yLabel: "↑ A Quality",-yDomain: [26,40],stroke: "steelblue",-width: 300, height: 300+width, height: 600})
Long reads:
longReads = Scatterplot(myFileTable, { x: d => d[" A_fr"], y: d => d[" qA"], title: d => d.FileName, xLabel: "A Frequency →", yLabel: "↑ A Quality", yDomain: [0,15], stroke: "steelblue", width: 450, height: 450 })
<hr> <hr>
#### Attempt to add regression line:
longReadsB = Scatterplot(myFileTable, { x: d => d[" A_fr"], y: d => d[" qA"], title: d => d.FileName, xLabel: "A Frequency →", yLabel: "↑ A Quality", yDomain: [0,15], stroke: "steelblue", width: 450, height: 450 }), linearRegression(myFileTable, {x: " A_fr", y: " qA"})
import {LineChart} from "@d3/multi-line-chart"
function linearRegression(data, options = {}) { let {stroke, x, y, z} = options; let [vstroke, cstroke] = maybeColor(stroke, "currentColor"); if (z === undefined && vstroke != null) z = vstroke; const X1 = []; const Y1 = []; const X2 = []; const Y2 = []; const S = vstroke ? [] : undefined; return Plot.link(data, { ...Plot.transform(options, (data, facets) => { const X = Plot.valueof(data, x); const Y = Plot.valueof(data, y); const Z = z !== undefined ? Plot.valueof(data, z) : undefined; const [x1, x2] = d3.extent(X); const regressionFacets = []; for (const facet of facets) { let F = facet.filter(i => defined(X[i]) && defined(Y[i])); const regressionFacet = []; for (const I of Z ? d3.group(F, i => Z[i]).values() : [F]) { const f = linearRegressionLine(I, X, Y); const i = I[0]; X1[i] = x1; X2[i] = x2; Y1[i] = f(x1); Y2[i] = f(x2); regressionFacet.push(i); } regressionFacets.push(regressionFacet); } return {data, facets: regressionFacets}; }), x1: X1, y1: Y1, x2: X2, y2: Y2 }); }
function linearRegressionLine(I, X, Y) { const n = I.length; if (n === 1) return () => Y[I[0]]; let sx = 0, sy = 0, sxx = 0, sxy = 0; for (const i of I) { const x = X[i]; const y = Y[i]; sx += x; sy += y; sxx += x * x; sxy += x * y; } const m = (n * sxy - sx * sy) / (n * sxx - sx * sx); const b = (sy - m * sx) / n; return x => b + m * x; }
function maybeColor(value, defaultValue) { if (value === undefined) value = defaultValue; return value === null ? [undefined, "none"] : isColor(value) ? [undefined, value] : [value, undefined]; }
function isColor(value) { if (typeof value !== "string") return false; value = value.toLowerCase().trim(); return value === "none" || value === "currentcolor" || (value.startsWith("url(") && value.endsWith(")")) // <funciri>, e.g. pattern or gradient || (value.startsWith("var(") && value.endsWith(")")) // CSS variable || d3.color(value) !== null; }
function defined(x) { return x != null && !Number.isNaN(x); }