mivanit · rusheb · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+# tabGPT
+
+_Use GPT to classify a bunch of your open tabs!_
+
+## Usage
+
+### Initial Setup
+
+1. Install [Poetry](https://python-poetry.org/docs/)
+1. Install project dependencies in new virtualenv:
+
+    ```poetry install```
+
+### bookmark_utils
+
+Parse bookmarks file to BookmarkFolder class and serialize to JSON.
+
+1. Export bookmarks to an HTML file. Instructions:
+    * [Firefox](https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer)
+1. Run bookmark_utils
+
+    ```poetry run python bookmark_utils.py path/to/bookmarks.html```
+
+### preprocess_urls.py
+
+Process a file of URLs and print to stdout a file with the metadata. Input file should contain one URL per line.
+
+1. Export tabs to a file
+    * Firefox: use [Export Tab URLs](https://addons.mozilla.org/en-GB/firefox/addon/export-tabs-urls-and-titles/) extension
+1. Run preprocess_urls
+
+    ```pipenv run python preprocess_urls.py path/to/tabs_file.txt --output_format=[json|yaml|yml]```
+
+### clasify_tabs
+
+TODO: description
+
+#### Default input
+
+1. Run classify_tabs
+
+    ```poetry run python classify_tabs.py gen```
+
+#### Custom input
+
+1. Write some sample text to a file `temp.txt` in the project directory
+
+1. Run classify_tabs:
+
+   ```poetry run python classify_tabs.py gen_file```
diff --git a/classify_tabs.py b/classify_tabs.py
@@ -8,6 +8,8 @@
 
 # pylint: disable=missing-class-docstring,missing-function-docstring,dangerous-default-value
 
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
 
 # load model as global variable
 # ==============================
@@ -31,7 +33,7 @@
 def generate_continuation(prompt : str, max_length : int = 5, stop_token : str|None = None) -> str:
     input_ids : torch.Tensor = TOKENIZER.encode(prompt, return_tensors="pt")
     generated_text_ids = MODEL.generate(
-        input_ids = input_ids.cuda(), 
+        input_ids = input_ids.to(device),
         max_length = max_length+len(input_ids[0]), 
         do_sample = False,
     )
@@ -46,7 +48,7 @@ def generate(prompt : str, max_length : int = 5, stop_token : str|None = None) -
 def get_logits_and_tokens(text : str):
     input_ids : torch.Tensor = TOKENIZER.encode(text, return_tensors="pt")
     tokens : list[str] = [TOKENIZER.decode([input_id]) for input_id in input_ids[0]]
-    output = MODEL(input_ids.cuda())
+    output = MODEL(input_ids.to(device))
     return output.logits[0][:-1], tokens