Daethyra · Daethyra · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/Automation Scripts/firewall_rules.py b/Automation Scripts/firewall_rules.py
@@ -1,85 +1,91 @@
-import requests
 import csv
+import logging
+import os
+import requests
 import subprocess
-import io
 import time
+from typing import List, Optional
 
-# Constants
+# Constants and Configurations
 URL = "https://feodotracker.abuse.ch/downloads/ipblocklist.csv"
 DELETE_RULE_TEMPLATE = "netsh advfirewall firewall delete rule name='BadIP_{direction}_{ip}'"
 BLOCK_RULE_TEMPLATE = "netsh advfirewall firewall add rule name='BadIP_{direction}_{ip}' dir={direction} action=block remoteip={ip}"
-MAX_RETRIES = 3
-RETRY_DELAY = 2  # in seconds
+MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3))
+RETRY_DELAY = int(os.getenv("RETRY_DELAY", 2))
 
-def rule_updater():
-    """
-    Fetches the IP blocklist CSV, parses it, and adds firewall rules to block the IP addresses.
-    """
-    response = None
-    for attempt in range(MAX_RETRIES):
+# Logging Setup
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+def execute_command(command: str) -> None:
+    """Execute system command with error handling."""
+    try:
+        subprocess.run(["Powershell", "-Command", command], check=True)
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Command failed: {e}")
+        raise
+
+
+def fetch_blocklist(url: str) -> Optional[str]:
+    """Fetch blocklist CSV with retries."""
+    for _ in range(MAX_RETRIES):
         try:
-            # Get data from source
-            response = requests.get(URL).text
-            break
-        except requests.exceptions.RequestException as e:
-            # Handle any exceptions that occur during the request
-            print(f"Error occurred while trying to get data from {URL}: {e}")
-            if attempt < MAX_RETRIES - 1:  # no delay on last attempt
-                time.sleep(RETRY_DELAY)
-
-    if response is None:
-        print("Failed to get data after maximum retries.")
+            return requests.get(url).text
+        except Exception as e:
+            logger.error(f"Error fetching: {e}")
+            time.sleep(RETRY_DELAY)
+    return None
+
+
+def parse_csv(data: str) -> List[List[str]]:
+    """Parse CSV data, filter comments & headers."""
+    return [row for row in csv.reader(data.splitlines()) if row]
+
+
+def update_firewall_rule(ip: str, direction: str) -> None:
+    """Update single firewall rule."""
+    delete_cmd = DELETE_RULE_TEMPLATE.format(direction=direction, ip=ip)
+    add_cmd = BLOCK_RULE_TEMPLATE.format(direction=direction, ip=ip)
+
+    try:
+        execute_command(delete_cmd)
+        execute_command(add_cmd)
+        logger.info(f"Updated {direction} rule for IP: {ip}")
+    except Exception as e:
+        logger.error(f"Update failed for {ip}: {e}")
+        raise
+
+
+def rule_updater():
+    """Orchestrate update process."""
+    data = fetch_blocklist(URL)
+    if not data:
+        logger.error("Failed to download blocklist")
         return
 
-    # Create separate CSV readers for outbound and inbound rules
-
-    # Process outbound rules
-    outbound_rules = csv.reader(filter(lambda x: not x.startswith('#'), response.splitlines()))
-    for row in outbound_rules:
-        """
-        Process outbound firewall rules.
-
-        Each row represents an outbound rule in the IP blocklist CSV.
-        It extracts the IP address and adds firewall rules to block that IP address for outbound traffic.
-        """
-        ip = row[1]
-        if ip != 'dst_ip':
-            print(f"Blocked outbound: {ip}")
-            try:
-                delete_rule = DELETE_RULE_TEMPLATE.format(direction='Out', ip=ip)
-                subprocess.run(["Powershell", "-Command", delete_rule], check=True)
-                block_rule = BLOCK_RULE_TEMPLATE.format(direction='Out', ip=ip)
-                subprocess.run(["Powershell", "-Command", block_rule], check=True)
-            except subprocess.CalledProcessError as e:
-                # Handle exceptions that occur if the subprocess call fails
-                print(f"Error occurred while trying to add outbound firewall rule: {e}")
-
-    # Process inbound rules
-    inbound_rules = csv.reader(filter(lambda x: not x.startswith('#'), response.splitlines()))
-    for row in inbound_rules:
-        """
-        Process inbound firewall rules.
-
-        Each row represents an inbound rule in the IP blocklist CSV.
-        It extracts the IP address and adds firewall rules to block that IP address for inbound traffic.
-        """
-        ip = row[1]
-        if ip != 'dst_ip':
-            print(f"Blocked inbound: {ip}")
-            try:
-                delete_rule = DELETE_RULE_TEMPLATE.format(direction='In', ip=ip)
-                subprocess.run(["Powershell", "-Command", delete_rule], check=True)
-                block_rule = BLOCK_RULE_TEMPLATE.format(direction='In', ip=ip)
-                subprocess.run(["Powershell", "-Command", block_rule], check=True)
-            except subprocess.CalledProcessError as e:
-                # Handle exceptions that occur if the subprocess call fails
-                print(f"Error occurred while trying to add inbound firewall rule: {e}")
+    rules = parse_csv(data)
+    for rule in rules:
+        ip = rule[1]
+        if ip == "dst_ip":
+            continue
+
+        try:
+            update_firewall_rule(ip, "In")
+            update_firewall_rule(ip, "Out")
+        except Exception as e:
+            logger.error(f"Error updating rule for IP {ip}: {e}")
+
 
 def main():
-    """
-    Entry point of the script.
-    """
-    rule_updater()
+    """Entry point for module execution"""
+    try:
+        rule_updater()
+    except Exception as e:
+        logger.error(f"Unhandled error: {e}")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/Automation Scripts/twitter-streaming/robust-streamer.py b/Automation Scripts/twitter-streaming/robust-streamer.py
@@ -24,21 +24,21 @@
 logger = logging.getLogger(__name__)
 
 # Initialize OpenAI API
-openai.api_key = os.getenv('OPENAI_API_KEY')
+openai.api_key = os.getenv("OPENAI_API_KEY")
 model_engine = "text-embeddings-ada-002"
 
 # Initialize Pinecone
-pinecone.init(api_key=os.getenv('PINECONE_API_KEY', 'default_api_key'))
+pinecone.init(api_key=os.getenv("PINECONE_API_KEY", "default_api_key"))
 
 # Define Pinecone index name
-pinedex = os.getenv('PINECONE_INDEX', 'threat-embeddings')
+pinedex = os.getenv("PINECONE_INDEX", "threat-embeddings")
 index = pinecone.Index(index_name=pinedex)
 
 
 def create_headers(bearer_token: str) -> Dict[str, str]:
     """
     Create headers for Twitter API requests.
-    
+
     :param bearer_token: Bearer token for Twitter API authentication.
     :return: Headers with the Authorization field set.
     """
@@ -48,34 +48,40 @@ def create_headers(bearer_token: str) -> Dict[str, str]:
 def get_existing_rules(headers: Dict[str, str]) -> Dict[str, Any]:
     """
     Retrieve existing rules for the Twitter stream.
-    
+
     :param headers: Headers for the Twitter API request.
     :return: JSON response containing existing rules.
     :raises Exception: If the request is not successful.
     """
-    response = requests.get("https://api.twitter.com/2/tweets/search/stream/rules", headers=headers)
+    response = requests.get(
+        "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
+    )
     if response.status_code != 200:
-        raise Exception(f"Cannot get rules (HTTP {response.status_code}): {response.text}")
+        raise Exception(
+            f"Cannot get rules (HTTP {response.status_code}): {response.text}"
+        )
     return response.json()
 
 
 def rules_are_equal(existing_rules: Dict[str, Any], new_rules: Dict[str, Any]) -> bool:
     """
     Check if the existing rules are equal to the new rules.
-    
+
     :param existing_rules: Existing rules from the Twitter API.
     :param new_rules: New rules to be set.
     :return: True if the rules are equal, False otherwise.
     """
-    existing_rules_set = {(rule['value'], rule['tag']) for rule in existing_rules.get('data', [])}
-    new_rules_set = {(rule['value'], rule['tag']) for rule in new_rules}
+    existing_rules_set = {
+        (rule["value"], rule["tag"]) for rule in existing_rules.get("data", [])
+    }
+    new_rules_set = {(rule["value"], rule["tag"]) for rule in new_rules}
     return existing_rules_set == new_rules_set
 
 
 def set_rules(headers: Dict[str, str], rules: Dict[str, Any]) -> None:
     """
     Set new rules for the Twitter stream.
-    
+
     :param headers: Headers for the Twitter API request.
     :param rules: New rules to be set.
     :raises Exception: If the request is not successful.
@@ -94,11 +100,10 @@ def set_rules(headers: Dict[str, str], rules: Dict[str, Any]) -> None:
 
 def delete_all_rules(headers: Dict[str, str]) -> None:
     """
-   Delete all existing rules for the Twitter stream.
+       Delete all existing rules for the Twitter stream.
 
-:param headers: Headers for the Twitter API request.
-:raises Exception: If the request is not successful.
-"""
+    :param headers: Headers for the Twitter API request.
+    :raises Exception: If the request is not successful."""
     response = requests.get(
         "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
     )
@@ -107,14 +112,14 @@ def delete_all_rules(headers: Dict[str, str]) -> None:
             f"Cannot get rules (HTTP {response.status_code}): {response.text}"
         )
     rules = response.json()
-    ids = [rule['id'] for rule in rules.get('data', {})]
+    ids = [rule["id"] for rule in rules.get("data", {})]
     if not ids:
         return
     payload = {"delete": {"ids": ids}}
     response = requests.post(
         "https://api.twitter.com/2/tweets/search/stream/rules",
         headers=headers,
-        json=payload
+        json=payload,
     )
     if response.status_code != 200:
         raise Exception(
@@ -131,35 +136,38 @@ async def process_tweet(headers: Dict[str, str], tweet: Dict[str, Any]) -> None:
     """
     try:
         # Redact PII from tweet text
-        tweet_text = scrubadub.clean(tweet['data']['text'])
+        tweet_text = scrubadub.clean(tweet["data"]["text"])
 
         # Generate embedding for the tweet text
         response = openai.Embedding.create(
-            model=model_engine,
-            texts=[tweet_text]
+            model=model_engine, texts=[tweet_text]
         )  # type: ignore
 
         # Type hint and check
         response: Dict[str, Any]
-        if isinstance(response, dict) and 'embeddings' in response:
-            tweet_embedding = response['embeddings'][0]['embedding']
+        if isinstance(response, dict) and "embeddings" in response:
+            tweet_embedding = response["embeddings"][0]["embedding"]
         else:
             logger.error(f"Unexpected response format: {response}")
             return
 
         # Upsert the tweet ID, vector embedding, and original text to Pinecone index
-        index.upsert(vectors=[(tweet['data']['id'], tweet_embedding, {'text': tweet_text})])
+        index.upsert(
+            vectors=[(tweet["data"]["id"], tweet_embedding, {"text": tweet_text})]
+        )
 
         # Query Pinecone index for similar tweets
         results = index.query(queries=[tweet_embedding], top_k=5, include_metadata=True)
 
         # Check if results are in the expected format
-        if 'scores' in results and 'ids' in results:
+        if "scores" in results and "ids" in results:
             # Log potential threats
-            for idx, score in enumerate(results['scores']):
+            for idx, score in enumerate(results["scores"]):
                 if score > 0.959:
-                    matched_id = results['ids'][idx]
-                    logger.info(f"Potential threat detected in tweet {tweet['data']['id']} with similarity score {score} to tweet {matched_id}")
+                    matched_id = results["ids"][idx]
+                    logger.info(
+                        f"Potential threat detected in tweet {tweet['data']['id']} with similarity score {score} to tweet {matched_id}"
+                    )
 
     except Exception as e:
         logger.error(f"Error processing tweet {tweet['data']['id']}: {e}")
@@ -172,9 +180,13 @@ def stream_to_file_and_stdout(headers: Dict[str, str]) -> None:
     :param headers: Headers for the Twitter API request.
     :raises Exception: If the request is not successful.
     """
-    response = requests.get("https://api.twitter.com/2/tweets/search/stream", headers=headers, stream=True)
+    response = requests.get(
+        "https://api.twitter.com/2/tweets/search/stream", headers=headers, stream=True
+    )
     if response.status_code != 200:
-        raise Exception(f"Cannot get stream (HTTP {response.status_code}): {response.text}")
+        raise Exception(
+            f"Cannot get stream (HTTP {response.status_code}): {response.text}"
+        )
 
     timestamp = datetime.datetime.now().strftime("%m%d%H%M")
     with open(f"twitter_stream_{timestamp}.txt", "w") as file:
@@ -186,27 +198,27 @@ def stream_to_file_and_stdout(headers: Dict[str, str]) -> None:
                 asyncio.run(process_tweet(headers, json_response))
 
 
-def main() ->None:
+def main() -> None:
     """
     Main function to set up and start the Twitter stream.
     """
     rules = [
         {
-            "value": "(LGBTQIA+ OR transgender OR gay OR lesbian OR bisexual OR queer OR intersex OR asexual OR genderfluid OR nonbinary) -has:links lang:en -is:retweet (context:entities:(sentiment: negative OR sentiment: very_negative))", 
-            "tag": "LGBTQIA+"
+            "value": "(LGBTQIA+ OR transgender OR gay OR lesbian OR bisexual OR queer OR intersex OR asexual OR genderfluid OR nonbinary) -has:links lang:en -is:retweet (context:entities:(sentiment: negative OR sentiment: very_negative))",
+            "tag": "LGBTQIA+",
         },
         {
-            "value": "('Donald Trump' OR 'Matt Walsh' OR 'dont tread on me' OR 'MAGA' OR 'Second Amendment' OR 'QAnon' OR 'Proud Boys' OR 'Oath Keepers') -has:links lang:en -is:retweet (context:entities:(sentiment: negative OR sentiment: very_negative))", 
-            "tag": "Right-Wing Extremism"
+            "value": "('Donald Trump' OR 'Matt Walsh' OR 'dont tread on me' OR 'MAGA' OR 'Second Amendment' OR 'QAnon' OR 'Proud Boys' OR 'Oath Keepers') -has:links lang:en -is:retweet (context:entities:(sentiment: negative OR sentiment: very_negative))",
+            "tag": "Right-Wing Extremism",
         },
         {
-            "value": "('white power' OR 'white pride' OR 'white nationalism' OR 'white supremacy' OR 'Ku Klux Klan' OR 'neo-Nazi') -has:links lang:en -is:retweet (context:entities:(sentiment: positive OR sentiment: very_positive))", 
-            "tag": "Religious Extremism"
-        }
+            "value": "('white power' OR 'white pride' OR 'white nationalism' OR 'white supremacy' OR 'Ku Klux Klan' OR 'neo-Nazi') -has:links lang:en -is:retweet (context:entities:(sentiment: positive OR sentiment: very_positive))",
+            "tag": "Religious Extremism",
+        },
     ]
-    bearer_token = os.getenv('TWITTER_BEARER_TOKEN')
+    bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
     headers = create_headers(bearer_token)
-    
+
     # Retrieve existing rules
     existing_rules = get_existing_rules(headers)