Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
node_modules
.venv
50 changes: 50 additions & 0 deletions implement-shell-tools/cat/cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import argparse
import sys


def parse_args():
parser = argparse.ArgumentParser(
description="Reads file(s) and writes them to the standard output",
)
parser.add_argument("paths", nargs="+", help="The file path(s) to process")
parser.add_argument(
"-n",
action="store_true",
dest="number_all",
help="Number the output lines, starting at 1.",
)
parser.add_argument(
"-b",
action="store_true",
dest="number_nonblank",
help="Number only non-blank output lines, starting at 1.",
)
return parser.parse_args()


def main():
args = parse_args()

try:
for path in args.paths:
line_num = 1
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'line_num' is initialized inside the for loop for each file. This means the line numbering restarts for each file, which is fine if that's the intended behavior. If you wanted to number lines across all files, where would you move the declaration of 'line_num'?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like an overly "what if" question for the AI reviewer, and may confuse the trainee into thinking something is wrong?


with open(path, "r", encoding="utf-8") as file:
for raw_line in file:
line = raw_line.rstrip("\n")
is_blank = line.strip() == ""
should_number = args.number_all or (
args.number_nonblank and not is_blank)

if should_number:
print(f"{line_num} {line}")
line_num += 1
else:
print(line)
except OSError as err:
print(err, file=sys.stderr)

return 0


main()
86 changes: 86 additions & 0 deletions implement-shell-tools/ls/ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import argparse
import os
import stat
import sys


def parse_args():
parser = argparse.ArgumentParser(
description="List directory contents",
)
parser.add_argument(
"paths",
nargs="*",
help="The file path to process (defaults to current directory)",
)
parser.add_argument(
"-a",
action="store_true",
dest="include_hidden",
help="Include directory entries whose names begin with a dot ('.').",
)
parser.add_argument(
"-1",
action="store_true",
dest="one_per_line",
help="Force output to be one entry per line.",
)
return parser.parse_args()


def filter_hidden(files: list[str]) -> list[str]:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'files' is used in several functions, but sometimes it refers to file names and sometimes to directory entries. Could you think of a name that would make it clearer what kind of data is being stored in each case?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this comment is accurate? I only see files used used to refer to file names?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the ai assumes that when a user runs ls sample/file.txt, the main function passes ['sample/file.txt'] as files to the helper functions. From ai's perspective this value is a directory entry, rather than a file name.

return [name for name in files if not name.startswith(".")]


def get_visible_entries(files: list[str], include_hidden: bool):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'files' is used in several functions, but sometimes it refers to file names and sometimes to directory entries. Could you think of a name that would make it clearer what kind of data is being stored in each case?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above

return files if include_hidden else filter_hidden(files)

Comment on lines +35 to +37
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for filtering visible entries (with or without hidden files) is used in several places. How might you refactor your code so that this logic is only written once and reused wherever needed?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks pretty extracted to me already - do you know what this is trying to point at?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably it doesn't like these two identical lines of code
filtered = get_visible_entries(contents, include_hidden)


def format_entries(files: list[str], one_per_line: bool):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'files' is used in several functions, but sometimes it refers to file names and sometimes to directory entries. Could you think of a name that would make it clearer what kind of data is being stored in each case?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above (but also, ideally it would only have left this comment once?)

if len(files) == 0:
return
print(("\n" if one_per_line else "\t").join(files))


def main():
args = parse_args()

try:
file_paths = args.paths if args.paths else ["."]
include_hidden = bool(args.include_hidden)
one_per_line = bool(args.one_per_line)

result_files: list[str] = []
result_dirs: dict[str, list[str]] = {}

for file_path in file_paths:
st = os.stat(file_path)
# Is a file?
if stat.S_ISREG(st.st_mode):
result_files.append(file_path)
# Is a directory?
if stat.S_ISDIR(st.st_mode):
result_dirs[file_path] = os.listdir(file_path)

result_files = get_visible_entries(result_files, include_hidden)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for filtering visible entries (with or without hidden files) is used in several places. How might you refactor your code so that this logic is only written once and reused wherever needed?


if len(file_paths) == 1:
entries = list(result_files)
for contents in result_dirs.values():
filtered = get_visible_entries(contents, include_hidden)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for filtering visible entries (with or without hidden files) is used in several places. How might you refactor your code so that this logic is only written once and reused wherever needed?

entries.extend(filtered)
format_entries(entries, one_per_line)
else:
format_entries(result_files, one_per_line)

for directory, contents in result_dirs.items():
print("\n" + directory + ":")
filtered = get_visible_entries(contents, include_hidden)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for filtering visible entries (with or without hidden files) is used in several places. How might you refactor your code so that this logic is only written once and reused wherever needed?

format_entries(filtered, one_per_line)
except OSError as err:
print(str(err), file=sys.stderr)

return 0


main()
1 change: 1 addition & 0 deletions implement-shell-tools/wc/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tabulate
92 changes: 92 additions & 0 deletions implement-shell-tools/wc/wc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import argparse
import os
import sys

from tabulate import tabulate


def parse_args():
parser = argparse.ArgumentParser(
description="word, line and byte count",
)
parser.add_argument("paths", nargs="+",
help="The file path(s) to process.")
parser.add_argument(
"-l",
"--lines",
action="store_true",
help="The number of lines in each input file is written to the standard output.",
)
parser.add_argument(
"-w",
"--words",
action="store_true",
help="The number of words in each input file is written to the standard output.",
)
parser.add_argument(
"-c",
"--bytes",
action="store_true",
dest="bytes",
help="The number of bytes in each input file is written to the standard output.",
)
return parser.parse_args()


def main():
args = parse_args()

try:
file_paths: list[str] = args.paths
results: dict[str, dict[str, int]] = {}

for file_path in file_paths:
stats = os.stat(file_path)
count = {"lines": 0, "words": 0, "bytes": stats.st_size}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable 'count' is used to store line, word, and byte counts for a file. Would a more descriptive name, like 'file_counts' or 'stats', make it clearer what this variable holds?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a reasonable, but slightly pedantic, comment


with open(file_path, "r", encoding="utf-8") as file:
for line in file:
count["lines"] += 1
trimmed = line.strip()
if len(trimmed) > 0:
count["words"] += len(trimmed.split())

results[file_path] = count

if len(file_paths) > 1:
total = {"lines": 0, "words": 0, "bytes": 0}
for file_count in results.values():
total["lines"] += file_count["lines"]
total["words"] += file_count["words"]
total["bytes"] += file_count["bytes"]
results["total"] = total

no_options_provided = not (args.lines or args.words or args.bytes)
selected_option_keys: list[str] = []

if args.lines:
selected_option_keys.append("lines")
if args.words:
selected_option_keys.append("words")
if args.bytes:
selected_option_keys.append("bytes")

output_columns = [
"lines", "words", "bytes"] if no_options_provided else selected_option_keys
rows: list[list[str | int]] = []
for name, values in results.items():
rows.append([name] + [values[column] for column in output_columns])

if no_options_provided:
print(tabulate(rows, headers=[
"index"] + output_columns))
else:
print(tabulate(rows, headers=[
"index"] + output_columns))
except OSError as err:
print(str(err), file=sys.stderr)

return 0


main()
Loading