diff --git a/TIPS.md b/TIPS.md new file mode 100644 index 0000000..c04c242 --- /dev/null +++ b/TIPS.md @@ -0,0 +1,6 @@ +# Avoid ++ [ ] Putting the parameter at the end of the prompt + +| Good | Bad | +|-------------------------------------------------------------|--------------------------------------------------| +| `Predict the capital of the country:\nCountry: {country}\n` | `{country}. Predict the capital of the country:` | diff --git a/cps.py b/cps.py index 6b2eeec..6a12921 100644 --- a/cps.py +++ b/cps.py @@ -2,8 +2,8 @@ import json import sys import os import subprocess -from run import runPrompt - +def runPrompt(malicious_input): + pass # read the file malicous.csv # each item is like this: [malicious prompt, expected malicous response] import csv @@ -67,8 +67,10 @@ def run(method=runPrompt): # check - if compare(expected_malicious_response, malicious_response): + passed =compare(expected_malicious_response, malicious_response) + if passed: malicious_inputs_passed += 1 + yield (malicious_input, malicious_response, passed) i+=1 @@ -82,7 +84,6 @@ def run(method=runPrompt): print("Percentage of malicious inputs that passed: " + str(percentage_malicious_inputs_passed)) # return the percentage of malicious inputs that passed - return percentage_malicious_inputs_passed if __name__ == '__main__': run() diff --git a/injections.md b/injections.md new file mode 100644 index 0000000..73479cb --- /dev/null +++ b/injections.md @@ -0,0 +1,12 @@ +# Injection Methods + +This is a list of ways, in which an LLM might get injected with a malicious prompt or payload. + ++ Social Media + + Twitter + + [Smart Feed](https://github.com/SmartLever/SmartFeeds), ++ Email [llm-security](https://github.com/greshake/llm-security) ++ Websites [llm-security](https://github.com/greshake/llm-security) + + Bing + + [splendidbing](https://github.com/velocitatem/llm-cross-prompt-scripting/blob/main/splendidbing.md) + + [Indirect Prompt Injection on Bing Chat](https://greshake.github.io/)