Skip to content

Commit 6f5ffc6

Browse files
committed
WIP [ci skip]
1 parent 4952701 commit 6f5ffc6

5 files changed

Lines changed: 174 additions & 52 deletions

File tree

src/errors/cassie.py

Lines changed: 45 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,9 @@ def get_metadata_for_bucket(bucketid: str, release: str = None):
415415
try:
416416
if not release:
417417
# Get all columns up to "~" (non-inclusive)
418-
rows = BucketMetadata.objects.filter(key=bucketid, column1__lt="~").all()
418+
rows = BucketMetadata.objects.filter(key=bucketid.encode(), column1__lt="~").all()
419419
else:
420-
rows = BucketMetadata.objects.filter(key=bucketid).all()
420+
rows = BucketMetadata.objects.filter(key=bucketid.encode()).all()
421421

422422
ret = {}
423423
for row in rows:
@@ -426,6 +426,9 @@ def get_metadata_for_bucket(bucketid: str, release: str = None):
426426
if release and ret:
427427
try:
428428
ret["FirstSeen"] = ret["~%s:FirstSeen" % release]
429+
except KeyError:
430+
pass
431+
try:
429432
ret["LastSeen"] = ret["~%s:LastSeen" % release]
430433
except KeyError:
431434
pass
@@ -444,37 +447,7 @@ def chunks(l, n):
444447
def get_metadata_for_buckets(bucketids, release=None):
445448
ret = dict()
446449
for bucketid in bucketids:
447-
bucket_key = bucketid.encode() if isinstance(bucketid, str) else bucketid
448-
try:
449-
if not release:
450-
rows = BucketMetadata.objects.filter(key=bucket_key, column1__lt="~").all()
451-
else:
452-
rows = BucketMetadata.objects.filter(key=bucket_key).all()
453-
454-
bucket_data = {}
455-
for row in rows:
456-
bucket_data[row.column1] = row.value
457-
458-
if bucket_data:
459-
ret[bucketid] = bucket_data
460-
except DoesNotExist:
461-
pass
462-
463-
if release:
464-
for bucket_id in ret:
465-
bucket = ret[bucket_id]
466-
try:
467-
bucket["FirstSeen"] = bucket["~%s:FirstSeen" % release]
468-
bucket["LastSeen"] = bucket["~%s:LastSeen" % release]
469-
except KeyError:
470-
# Rather than confuse developers with half release-specific
471-
# data. Of course this will only apply for the current row, so
472-
# it's possible subsequent rows will show release-specific
473-
# data.
474-
if "FirstSeen" in bucket:
475-
del bucket["FirstSeen"]
476-
if "LastSeen" in bucket:
477-
del bucket["LastSeen"]
450+
ret[bucketid] = get_metadata_for_bucket(bucketid, release)
478451
return ret
479452

480453

@@ -645,65 +618,75 @@ def get_package_crash_rate(
645618

646619
# the generic counter only includes Crashes for packages from official
647620
# Ubuntu sources and from systems not under auto testing
648-
old_vers_column = "%s:%s:%s" % (release, src_package, old_version)
649-
new_vers_column = "%s:%s:%s" % (release, src_package, new_version)
621+
old_vers_column = "oopses:Crash:%s:%s:%s" % (release, src_package, old_version)
622+
new_vers_column = "oopses:Crash:%s:%s:%s" % (release, src_package, new_version)
650623
results = {}
651624

652625
try:
653-
# The first thing done is the reversing of the order that's why it
654-
# is column_start (get items <= date in reverse order)
655626
old_rows = (
656627
Counters.objects.filter(key=old_vers_column.encode(), column1__lte=date)
628+
.order_by("-column1")
657629
.limit(15)
658630
.all()
659631
)
660-
old_rows_sorted = sorted(old_rows, key=lambda x: x.column1, reverse=True)
661-
old_vers_data = {row.column1: row.value for row in old_rows_sorted}
632+
old_vers_data = {row.column1: row.value for row in old_rows}
662633
except DoesNotExist:
663634
old_vers_data = None
664635

665636
try:
666637
# this may be unnecessarily long since updates phase in ~3 days
667-
new_rows = Counters.objects.filter(key=new_vers_column.encode()).limit(15).all()
668-
new_rows_sorted = sorted(new_rows, key=lambda x: x.column1, reverse=True)
669-
new_vers_data = {row.column1: row.value for row in new_rows_sorted}
638+
new_rows = (
639+
Counters.objects.filter(key=new_vers_column.encode())
640+
.order_by("-column1")
641+
.limit(15)
642+
.all()
643+
)
644+
print(new_rows)
645+
new_vers_data = {row.column1: row.value for row in new_rows}
646+
print(new_vers_data)
670647
except DoesNotExist:
648+
print("New data does not exist")
671649
results["increase"] = False
672650
return results
673651

674652
if not new_vers_data:
653+
print("No new data")
675654
results["increase"] = False
676655
return results
677656

678657
if exclude_proposed:
679658
try:
680659
proposed_old_rows = (
681660
CountersForProposed.objects.filter(key=old_vers_column.encode(), column1__lte=date)
661+
.order_by("-column1")
682662
.limit(15)
683663
.all()
684664
)
685-
proposed_old_rows_sorted = sorted(
686-
proposed_old_rows, key=lambda x: x.column1, reverse=True
687-
)
688-
proposed_old_vers_data = {row.column1: row.value for row in proposed_old_rows_sorted}
665+
proposed_old_vers_data = {row.column1: row.value for row in proposed_old_rows}
689666
except DoesNotExist:
690667
proposed_old_vers_data = None
691668
try:
692669
proposed_new_rows = (
693-
CountersForProposed.objects.filter(key=new_vers_column.encode()).limit(15).all()
694-
)
695-
proposed_new_rows_sorted = sorted(
696-
proposed_new_rows, key=lambda x: x.column1, reverse=True
670+
CountersForProposed.objects.filter(key=new_vers_column.encode())
671+
.order_by("-column1")
672+
.limit(15)
673+
.all()
697674
)
698-
proposed_new_vers_data = {row.column1: row.value for row in proposed_new_rows_sorted}
675+
proposed_new_vers_data = {row.column1: row.value for row in proposed_new_rows}
699676
except DoesNotExist:
700677
proposed_new_vers_data = None
701678

679+
print(f"{proposed_old_vers_data=}")
680+
print(f"{proposed_new_vers_data=}")
681+
print(f"{old_vers_data=}")
682+
print(f"{new_vers_data=}")
702683
today = datetime.datetime.utcnow().strftime("%Y%m%d")
684+
print(today)
703685
try:
704686
today_crashes = new_vers_data[today]
705687
except KeyError:
706688
# no crashes today so not an increase
689+
print("No data for today")
707690
results["increase"] = False
708691
return results
709692

@@ -716,6 +699,7 @@ def get_package_crash_rate(
716699
today_crashes = today_crashes - today_proposed_crashes
717700
if today_crashes == 0:
718701
# no crashes today so not an increase
702+
print("No data for today outside -proposed")
719703
results["increase"] = False
720704
return results
721705

@@ -733,8 +717,11 @@ def get_package_crash_rate(
733717
return results
734718

735719
first_date = date
720+
print(f"{first_date=}")
736721
oldest_date = list(old_vers_data.keys())[-1]
722+
print(f"{oldest_date=}")
737723
dates = [x for x in _date_range_iterator(oldest_date, first_date)]
724+
print(f"{dates=}")
738725
previous_vers_crashes = []
739726
previous_days = len(dates[:-1])
740727
for day in dates[:-1]:
@@ -756,12 +743,15 @@ def get_package_crash_rate(
756743
results["increase"] = False
757744
# 2 crashes may be a fluke
758745
if today_crashes < 3:
746+
print("Less than 3 crashes today")
759747
return results
760748

761749
now = datetime.datetime.utcnow()
762750
hour = float(now.hour)
763751
minute = float(now.minute)
764752
mean_crashes = numpy.average(previous_vers_crashes)
753+
print(f"{mean_crashes=}")
754+
print(f"{previous_vers_crashes=}")
765755
standard_crashes = (mean_crashes + numpy.std(previous_vers_crashes)).round()
766756
# if an update isn't fully phased then the previous package version will
767757
# generally have more crashes than the phasing one so multiple the quanity
@@ -786,6 +776,10 @@ def get_package_crash_rate(
786776
results["web_link"] = absolute_uri + web_link
787777
results["previous_period_in_days"] = previous_days
788778
results["previous_average"] = standard_crashes
779+
print("Difference less than 1")
780+
print(f"{difference=}")
781+
print(f"{today_crashes=}")
782+
print(f"{standard_crashes=}")
789783
return results
790784

791785

src/errortracker/cassandra_schema.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,18 @@ class SystemOOPSHashes(ErrorTrackerTable):
114114

115115
class BucketMetadata(ErrorTrackerTable):
116116
__table_name__ = "BucketMetadata"
117+
# the bucket ID
118+
# - /bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread
117119
key = columns.Blob(db_field="key", primary_key=True)
120+
# Which metadata
121+
# - FirstSeen (package version)
122+
# - LastSeen (package version)
123+
# - FirstSeenRelease (Ubuntu series)
124+
# - ~Ubuntu 25.04:LastSeen (package version)
118125
column1 = columns.Text(db_field="column1", primary_key=True)
126+
# The corresponding value for the metadata
127+
# - 5.9-6ubuntu2 (package version)
128+
# - Ubuntu 18.04 (Ubuntu series)
119129
value = columns.Text(db_field="value")
120130

121131
@classmethod

src/errortracker/oopses.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def _insert(
100100
:param ttl: boolean for setting the time to live for the column
101101
:return: The day which the oops was filed under.
102102
"""
103-
day_key = time.strftime("%Y%m%d", time.gmtime())
103+
try:
104+
# Try to get the actual day of that crash, otherwise fallback to today
105+
day_key = time.strftime("%Y%m%d", time.strptime(insert_dict["Date"], "%c"))
106+
except Exception:
107+
day_key = time.strftime("%Y%m%d", time.gmtime())
104108
now_uuid = uuid.uuid1()
105109

106110
if ttl:

src/tests/conftest.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66

77
"""Test helpers for working with cassandra."""
88

9+
import locale
910
import shutil
1011
import tempfile
12+
from datetime import datetime, timedelta
1113
from pathlib import Path
1214
from unittest.mock import patch
1315

@@ -45,3 +47,63 @@ def retracer(temporary_db):
4547
architecture=architecture,
4648
)
4749
shutil.rmtree(temp)
50+
51+
52+
@pytest.fixture(scope="module")
53+
def datetime_now():
54+
return datetime.now()
55+
56+
57+
@pytest.fixture(scope="function")
58+
def cassandra_data(datetime_now, temporary_db):
59+
import bson
60+
import logging
61+
62+
from daisy.submit import submit
63+
64+
# disable daisy logger temporarily
65+
daisy_logger = logging.getLogger("daisy")
66+
daisy_logger_level = daisy_logger.level
67+
daisy_logger.setLevel(51) # CRITICAL is 50, so let's go higher
68+
69+
# Make sure the datetime will get formatted "correctly" in that cursed time format: Mon May 5 14:46:10 2025
70+
locale.setlocale(locale.LC_ALL, "C.UTF-8")
71+
72+
def count():
73+
counter = 0
74+
while True:
75+
yield str(counter)
76+
counter += 1
77+
78+
def new_oops(days_ago, data, systemid="imatestsystem"):
79+
crash_date = datetime_now - timedelta(days=days_ago)
80+
oops_date = crash_date.strftime("%c")
81+
data.update({"Date": oops_date})
82+
bson_data = bson.encode(data)
83+
request = type(
84+
"Request",
85+
(object,),
86+
dict(data=bson_data, headers={"X-Whoopsie-Version": "0.2.81ubuntu~fakefortesting"}),
87+
)
88+
submit(request, systemid)
89+
90+
# Get a wide screen, because here we'll want to have compact data, meaning long lines 🙃
91+
# fmt: off
92+
93+
# increase-rate package version 1
94+
for i in [30, 20, 10, 5, 2]:
95+
new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+28"})
96+
97+
# increase-rate package version 2
98+
for i in [2, 2, 1, 1, 1, 0, 0, 0, 0]:
99+
new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+fa0"})
100+
101+
# increase-rate package version 2 in proposed, even more crashes!
102+
for i in [1, 0]:
103+
new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+fa0", "Tags": "package-from-proposed"})
104+
# fmt: on
105+
106+
# re-enable daisy logger
107+
daisy_logger.setLevel(daisy_logger_level)
108+
109+
yield

src/tests/test_cassie.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from datetime import timedelta
2+
3+
import numpy
4+
from pytest import approx
5+
6+
from errors import cassie
7+
8+
9+
class TestPrune:
10+
def test_get_package_crash_rate_increase_rate(self, datetime_now, cassandra_data):
11+
now = datetime_now
12+
13+
crash_rate = cassie.get_package_crash_rate(
14+
"Ubuntu 24.04",
15+
"increase-rate",
16+
"1",
17+
"2",
18+
"70",
19+
(now - timedelta(days=0)).strftime("%Y%m%d"),
20+
"https://errors.internal/",
21+
)
22+
assert crash_rate == approx(
23+
{
24+
"increase": True,
25+
"difference": numpy.float64(4.3),
26+
"web_link": "https://errors.internal/?release=Ubuntu%2024.04&package=increase-rate&version=2",
27+
"previous_period_in_days": 30,
28+
"previous_average": numpy.float64(0.7),
29+
},
30+
rel=1e-1, # We don't want much precision, Cassandra is already messing up the values
31+
)
32+
33+
crash_rate = cassie.get_package_crash_rate(
34+
"Ubuntu 24.04",
35+
"increase-rate",
36+
"1",
37+
"2",
38+
"70",
39+
(now - timedelta(days=0)).strftime("%Y%m%d"),
40+
"https://errors.internal/",
41+
True,
42+
)
43+
assert crash_rate == approx(
44+
{
45+
"increase": True,
46+
"difference": numpy.float64(3.4),
47+
"web_link": "https://errors.internal/?release=Ubuntu%2024.04&package=increase-rate&version=2",
48+
"previous_period_in_days": 30,
49+
"previous_average": numpy.float64(0.7),
50+
},
51+
rel=1e-1, # We don't want much precision, Cassandra is already messing up the values
52+
)

0 commit comments

Comments
 (0)