diff --git a/Cargo.lock b/Cargo.lock
index d84574f0..a6a4339e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5423,6 +5423,7 @@ dependencies = [
"pyo3",
"pyo3-log",
"rand",
+ "rayon",
"sequila-core",
"tokio",
"tracing",
diff --git a/Cargo.toml b/Cargo.toml
index 47d326b7..220c08be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ datafusion-python = { git = "https://github.com/apache/datafusion-python.git", r
pyo3 = { version = "0.22"}
pyo3-log = "0.11.0"
sequila-core = { git = "https://github.com/biodatageeks/sequila-native.git", rev = "43453dca320cc25a02f440850e3b947b813785c3" }
+rayon = "1.8"
datafusion = { version = "43.0.0"}
arrow = "53.3.0"
diff --git a/Zapis_genomow_decoded.md b/Zapis_genomow_decoded.md
new file mode 100644
index 00000000..87e82aed
--- /dev/null
+++ b/Zapis_genomow_decoded.md
@@ -0,0 +1,21 @@
+# Jak to działa?
+
+Na przykładzie tej sekwencji (poniżej cała jedna sekwencja):
+```` py
+
+@SRR9130495.1 D00236:723:HG32CBCX2:1:1108:1330:1935/1
+NCAATACAAAAGCAATATGGGAGAAGCTACCTACCATGCTTAAAAACGCCAATGAGCAGNGATTTGTCANCNNNNNNNNCNNNNNNNNTNNTANNANNCTC
++
+#4BDFDFFHGHGGJJJHIIIIGGIIJGJJGIIIIBHIJJJIIJIJJIJDHIGGGIJJJI#-@AEHGEFF#,########,########+##++##+##+2<
+
+````
+
+## Zgodnie z linijkami kolejno
+
+1. '@identyfikator sekwencji' małpa symbolizuje początek rekordu --> dalej jest ciąg znaków a xxx.1 to liczba porządkowa
+2. sekwencja genetyczna DNA czyli zapisane geny gdzie N - niepewny odczyt
+3. '+' separator "jakości" czyli po tym są wyniki
+4. jakość odczytu (Czyli ten Phred Score) dla każdego nukleotydu (genu) w postaci znaków ASCII
+
+W tabeli wynikowej jeden wiersz odpowiada jednej sekwencji
+
diff --git a/benchmark/src/base_seq_quality.py b/benchmark/src/base_seq_quality.py
new file mode 100644
index 00000000..bd62b526
--- /dev/null
+++ b/benchmark/src/base_seq_quality.py
@@ -0,0 +1,12 @@
+import polars_bio as pb
+import pandas
+
+path = "example.fastq"
+
+print('Odczytan zawartość pliku', path)
+fastq = pb.read_fastq(path).collect().head()
+print(fastq)
+
+print('Base sequence quality dla pliku', path)
+test = pb.cacl_base_seq_quality(path, target_partitions=4, output_type='pandas.DataFrame')
+print(test.head())
\ No newline at end of file
diff --git a/benchmark/src/base_seq_quality_report.py b/benchmark/src/base_seq_quality_report.py
new file mode 100644
index 00000000..108399fa
--- /dev/null
+++ b/benchmark/src/base_seq_quality_report.py
@@ -0,0 +1,136 @@
+import sys
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+import polars_bio as pb
+
+
+def main():
+ script_dir = Path(__file__).parent
+ fastq = script_dir / "example.fastq"
+ out_html = script_dir / "report_full.html"
+
+ if not fastq.exists():
+ print(f"Nie znaleziono pliku: {fastq}", file=sys.stderr)
+ sys.exit(1)
+ print(f"Parsuję FASTQ: {fastq}")
+
+ df = pb.cacl_base_seq_quality(str(fastq), output_type='pandas.DataFrame')
+ print(df.columns)
+ print(df[['min', 'q1', 'median', 'q3', 'max']].apply(len))
+ print(f"Policzono statystyki: {df.shape[0]} pozycji")
+
+ traces = []
+
+ for _, r in df.iterrows():
+ traces.append(go.Scatter(
+ x=[r["min"], r["max"]],
+ y=[r["position"], r["position"]],
+ mode="lines",
+ line=dict(color="gray", width=2),
+ showlegend=False,
+ hoverinfo="skip"
+ ))
+
+ customdata = df[['min', 'q1', 'median', 'q3', 'max']].to_numpy()
+
+ traces.append(go.Bar(
+ x=df["q3"] - df["q1"],
+ y=df["position"],
+ base=df["q1"],
+ orientation="h",
+ marker_color="rgba(0,100,80,0.6)",
+ marker_line=dict(color="rgba(0,100,80,1)", width=2),
+ width=0.6,
+ showlegend=False,
+ customdata=customdata,
+ hovertemplate=(
+ "Position: %{y}
"
+ "Min: %{customdata[0]}
"
+ "Q1: %{customdata[1]}
"
+ "Median: %{customdata[2]}
"
+ "Q3: %{customdata[3]}
"
+ "Max: %{customdata[4]}
"
+ )
+ ))
+
+ traces.append(go.Scatter(
+ x=df["median"],
+ y=df["position"],
+ mode="markers",
+ marker=dict(
+ color="white",
+ symbol="line-ns-open",
+ size=16,
+ line=dict(width=2)
+ ),
+ showlegend=False,
+ hoverinfo="skip"
+ ))
+
+ traces.append(go.Scatter(
+ x=df["median"],
+ y=df["position"],
+ mode="lines+markers",
+ line=dict(color="red", width=2),
+ marker=dict(size=4),
+ name="Median",
+ hoverinfo="skip"
+ ))
+
+ height = max(600, df.shape[0] * 60 + 200)
+
+ fig = go.Figure(traces)
+ fig.update_yaxes(
+ autorange="reversed",
+ title="Position in read (bp)",
+ dtick=1,
+ range=[0, 100]
+ )
+ fig.update_xaxes(
+ title="Phred score",
+ dtick=2
+ )
+ fig.update_layout(
+ title="Phred Score per Base Position",
+ template="plotly_white",
+ height=height,
+ margin=dict(l=80, r=20, t=60, b=60)
+ )
+
+ plot_html = fig.to_html(full_html=False, include_plotlyjs='cdn')
+
+ table_html = df.to_html(classes="table table-striped", index=False)
+
+ html = f"""
+
+
+
| position | +min | +max | +median | +q1 | +q3 | +sample_count | +
|---|---|---|---|---|---|---|
| 0 | +2.0 | +34.0 | +33.0 | +31.00 | +34.00 | +200 | +
| 1 | +10.0 | +34.0 | +34.0 | +31.00 | +34.00 | +200 | +
| 2 | +16.0 | +34.0 | +34.0 | +31.00 | +34.00 | +200 | +
| 3 | +19.0 | +37.0 | +37.0 | +35.00 | +37.00 | +200 | +
| 4 | +16.0 | +37.0 | +37.0 | +35.00 | +37.00 | +200 | +
| 5 | +10.0 | +37.0 | +35.0 | +35.00 | +37.00 | +200 | +
| 6 | +10.0 | +37.0 | +35.0 | +35.00 | +37.00 | +200 | +
| 7 | +10.0 | +37.0 | +36.0 | +35.00 | +37.00 | +200 | +
| 8 | +27.0 | +39.0 | +39.0 | +37.00 | +39.00 | +200 | +
| 9 | +19.0 | +39.0 | +39.0 | +37.00 | +39.00 | +200 | +
| 10 | +19.0 | +39.0 | +39.0 | +37.00 | +39.00 | +200 | +
| 11 | +18.0 | +39.0 | +39.0 | +37.00 | +39.00 | +200 | +
| 12 | +19.0 | +39.0 | +39.0 | +37.00 | +39.00 | +200 | +
| 13 | +25.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 14 | +27.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 15 | +18.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 16 | +10.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 17 | +27.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 18 | +16.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 19 | +10.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 20 | +23.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 21 | +8.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 22 | +8.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 23 | +8.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 24 | +10.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 25 | +8.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 26 | +10.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 27 | +10.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 28 | +16.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 29 | +23.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 30 | +8.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 31 | +8.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 32 | +9.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 33 | +7.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 34 | +18.0 | +41.0 | +40.0 | +37.75 | +41.00 | +200 | +
| 35 | +17.0 | +41.0 | +40.0 | +38.00 | +41.00 | +200 | +
| 36 | +7.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 37 | +8.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 38 | +6.0 | +41.0 | +40.0 | +37.75 | +41.00 | +200 | +
| 39 | +6.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 40 | +14.0 | +41.0 | +40.0 | +37.75 | +40.00 | +200 | +
| 41 | +8.0 | +41.0 | +40.0 | +37.00 | +41.00 | +200 | +
| 42 | +6.0 | +41.0 | +40.0 | +37.00 | +40.00 | +200 | +
| 43 | +8.0 | +41.0 | +40.0 | +38.00 | +40.00 | +200 | +
| 44 | +6.0 | +41.0 | +40.0 | +37.00 | +41.00 | +200 | +
| 45 | +8.0 | +41.0 | +40.0 | +36.00 | +41.00 | +200 | +
| 46 | +7.0 | +41.0 | +39.5 | +37.00 | +41.00 | +200 | +
| 47 | +5.0 | +41.0 | +39.0 | +37.00 | +41.00 | +200 | +
| 48 | +7.0 | +41.0 | +40.0 | +36.00 | +41.00 | +200 | +
| 49 | +8.0 | +41.0 | +39.0 | +36.00 | +40.00 | +200 | +
| 50 | +10.0 | +41.0 | +39.0 | +35.75 | +41.00 | +200 | +
| 51 | +7.0 | +41.0 | +39.0 | +36.00 | +41.00 | +200 | +
| 52 | +7.0 | +41.0 | +39.0 | +37.00 | +40.00 | +200 | +
| 53 | +5.0 | +41.0 | +39.0 | +37.00 | +40.25 | +200 | +
| 54 | +5.0 | +41.0 | +39.5 | +36.00 | +41.00 | +200 | +
| 55 | +6.0 | +41.0 | +39.0 | +36.00 | +40.00 | +200 | +
| 56 | +5.0 | +41.0 | +39.0 | +36.00 | +40.00 | +200 | +
| 57 | +5.0 | +41.0 | +39.0 | +36.00 | +40.00 | +200 | +
| 58 | +5.0 | +41.0 | +38.0 | +35.00 | +40.00 | +200 | +
| 59 | +2.0 | +41.0 | +38.0 | +35.00 | +40.00 | +200 | +
| 60 | +7.0 | +41.0 | +38.0 | +35.00 | +40.00 | +200 | +
| 61 | +7.0 | +41.0 | +38.0 | +34.75 | +40.00 | +200 | +
| 62 | +5.0 | +41.0 | +38.0 | +34.75 | +40.00 | +200 | +
| 63 | +7.0 | +41.0 | +38.0 | +35.00 | +40.00 | +200 | +
| 64 | +5.0 | +41.0 | +37.0 | +35.00 | +40.00 | +200 | +
| 65 | +5.0 | +41.0 | +37.0 | +35.00 | +39.00 | +200 | +
| 66 | +6.0 | +41.0 | +37.0 | +35.00 | +39.00 | +200 | +
| 67 | +6.0 | +41.0 | +36.5 | +34.00 | +39.00 | +200 | +
| 68 | +7.0 | +41.0 | +36.0 | +34.00 | +39.00 | +200 | +
| 69 | +2.0 | +41.0 | +36.0 | +34.00 | +39.00 | +200 | +
| 70 | +7.0 | +41.0 | +35.0 | +34.00 | +38.00 | +200 | +
| 71 | +2.0 | +41.0 | +35.0 | +33.00 | +38.00 | +200 | +
| 72 | +2.0 | +41.0 | +35.0 | +33.00 | +37.00 | +200 | +
| 73 | +2.0 | +41.0 | +35.0 | +33.00 | +37.00 | +200 | +
| 74 | +2.0 | +41.0 | +35.0 | +32.00 | +37.00 | +200 | +
| 75 | +2.0 | +41.0 | +35.0 | +30.75 | +37.00 | +200 | +
| 76 | +2.0 | +41.0 | +35.0 | +30.75 | +37.00 | +200 | +
| 77 | +2.0 | +41.0 | +35.0 | +30.00 | +36.00 | +200 | +
| 78 | +2.0 | +41.0 | +35.0 | +31.00 | +36.00 | +200 | +
| 79 | +7.0 | +41.0 | +35.0 | +31.75 | +36.00 | +200 | +
| 80 | +2.0 | +41.0 | +35.0 | +32.00 | +36.00 | +200 | +
| 81 | +2.0 | +41.0 | +35.0 | +33.00 | +36.00 | +200 | +
| 82 | +2.0 | +41.0 | +35.0 | +33.00 | +35.25 | +200 | +
| 83 | +2.0 | +40.0 | +35.0 | +33.00 | +35.00 | +200 | +
| 84 | +2.0 | +41.0 | +35.0 | +33.00 | +35.00 | +200 | +
| 85 | +2.0 | +40.0 | +35.0 | +32.75 | +35.00 | +200 | +
| 86 | +2.0 | +40.0 | +35.0 | +33.00 | +35.00 | +200 | +
| 87 | +2.0 | +40.0 | +35.0 | +32.00 | +35.00 | +200 | +
| 88 | +5.0 | +39.0 | +35.0 | +33.00 | +35.00 | +200 | +
| 89 | +2.0 | +39.0 | +35.0 | +33.00 | +35.00 | +200 | +
| 90 | +2.0 | +39.0 | +34.5 | +33.00 | +35.00 | +200 | +
| 91 | +5.0 | +40.0 | +35.0 | +32.00 | +35.00 | +200 | +
| 92 | +5.0 | +39.0 | +35.0 | +32.00 | +35.00 | +200 | +
| 93 | +2.0 | +39.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 94 | +2.0 | +40.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 95 | +7.0 | +37.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 96 | +2.0 | +38.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 97 | +2.0 | +38.0 | +34.0 | +31.00 | +35.00 | +200 | +
| 98 | +7.0 | +40.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 99 | +5.0 | +38.0 | +34.0 | +32.00 | +35.00 | +200 | +
| 100 | +7.0 | +37.0 | +34.0 | +31.00 | +35.00 | +200 | +
| \n", + " | position | \n", + "min | \n", + "max | \n", + "median | \n", + "q1 | \n", + "q3 | \n", + "sample_count | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "2.0 | \n", + "34.0 | \n", + "33.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 1 | \n", + "1 | \n", + "10.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 2 | \n", + "2 | \n", + "16.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 3 | \n", + "3 | \n", + "19.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| 4 | \n", + "4 | \n", + "16.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 96 | \n", + "96 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 97 | \n", + "97 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 98 | \n", + "98 | \n", + "7.0 | \n", + "40.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 99 | \n", + "99 | \n", + "5.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 100 | \n", + "100 | \n", + "7.0 | \n", + "37.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
101 rows × 7 columns
\n", + "" + ], + "text/plain": [ + " position min max median q1 q3 sample_count\n", + "0 0 2.0 34.0 33.0 31.0 34.0 200\n", + "1 1 10.0 34.0 34.0 31.0 34.0 200\n", + "2 2 16.0 34.0 34.0 31.0 34.0 200\n", + "3 3 19.0 37.0 37.0 35.0 37.0 200\n", + "4 4 16.0 37.0 37.0 35.0 37.0 200\n", + ".. ... ... ... ... ... ... ...\n", + "96 96 2.0 38.0 34.0 32.0 35.0 200\n", + "97 97 2.0 38.0 34.0 31.0 35.0 200\n", + "98 98 7.0 40.0 34.0 32.0 35.0 200\n", + "99 99 5.0 38.0 34.0 32.0 35.0 200\n", + "100 100 7.0 37.0 34.0 31.0 35.0 200\n", + "\n", + "[101 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pb.cacl_base_seq_quality(path, output_type='pandas.DataFrame')" + ] + }, + { + "cell_type": "markdown", + "id": "74720ec0", + "metadata": {}, + "source": [ + "Mozna równiez zmieniać liczbę wątków" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "218aad32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | position | \n", + "min | \n", + "max | \n", + "median | \n", + "q1 | \n", + "q3 | \n", + "sample_count | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "2.0 | \n", + "34.0 | \n", + "33.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 1 | \n", + "1 | \n", + "10.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 2 | \n", + "2 | \n", + "16.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 3 | \n", + "3 | \n", + "19.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| 4 | \n", + "4 | \n", + "16.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 96 | \n", + "96 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 97 | \n", + "97 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 98 | \n", + "98 | \n", + "7.0 | \n", + "40.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 99 | \n", + "99 | \n", + "5.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 100 | \n", + "100 | \n", + "7.0 | \n", + "37.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
101 rows × 7 columns
\n", + "| \n", + " | position | \n", + "min | \n", + "max | \n", + "median | \n", + "q1 | \n", + "q3 | \n", + "sample_count | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "2.0 | \n", + "34.0 | \n", + "33.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 1 | \n", + "1 | \n", + "10.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 2 | \n", + "2 | \n", + "16.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 3 | \n", + "3 | \n", + "19.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| 4 | \n", + "4 | \n", + "16.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 96 | \n", + "96 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 97 | \n", + "97 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 98 | \n", + "98 | \n", + "7.0 | \n", + "40.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 99 | \n", + "99 | \n", + "5.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 100 | \n", + "100 | \n", + "7.0 | \n", + "37.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
101 rows × 7 columns
\n", + "" + ], + "text/plain": [ + " position min max median q1 q3 sample_count\n", + "0 0 2.0 34.0 33.0 31.0 34.0 200\n", + "1 1 10.0 34.0 34.0 31.0 34.0 200\n", + "2 2 16.0 34.0 34.0 31.0 34.0 200\n", + "3 3 19.0 37.0 37.0 35.0 37.0 200\n", + "4 4 16.0 37.0 37.0 35.0 37.0 200\n", + ".. ... ... ... ... ... ... ...\n", + "96 96 2.0 38.0 34.0 32.0 35.0 200\n", + "97 97 2.0 38.0 34.0 31.0 35.0 200\n", + "98 98 7.0 40.0 34.0 32.0 35.0 200\n", + "99 99 5.0 38.0 34.0 32.0 35.0 200\n", + "100 100 7.0 37.0 34.0 31.0 35.0 200\n", + "\n", + "[101 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pb.cacl_base_seq_quality(path, output_type='pandas.DataFrame')" + ] + }, + { + "cell_type": "markdown", + "id": "74720ec0", + "metadata": {}, + "source": [ + "Mozna równiez zmieniać liczbę wątków" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "218aad32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | position | \n", + "min | \n", + "max | \n", + "median | \n", + "q1 | \n", + "q3 | \n", + "sample_count | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "2.0 | \n", + "34.0 | \n", + "33.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 1 | \n", + "1 | \n", + "10.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 2 | \n", + "2 | \n", + "16.0 | \n", + "34.0 | \n", + "34.0 | \n", + "31.0 | \n", + "34.0 | \n", + "200 | \n", + "
| 3 | \n", + "3 | \n", + "19.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| 4 | \n", + "4 | \n", + "16.0 | \n", + "37.0 | \n", + "37.0 | \n", + "35.0 | \n", + "37.0 | \n", + "200 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 96 | \n", + "96 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 97 | \n", + "97 | \n", + "2.0 | \n", + "38.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 98 | \n", + "98 | \n", + "7.0 | \n", + "40.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 99 | \n", + "99 | \n", + "5.0 | \n", + "38.0 | \n", + "34.0 | \n", + "32.0 | \n", + "35.0 | \n", + "200 | \n", + "
| 100 | \n", + "100 | \n", + "7.0 | \n", + "37.0 | \n", + "34.0 | \n", + "31.0 | \n", + "35.0 | \n", + "200 | \n", + "
101 rows × 7 columns
\n", + "