From f60dee202d8289c18e3c97b73bee512fb2dbfa89 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Wed, 8 Mar 2023 19:13:18 +0100 Subject: [PATCH] Fixed missing replacement --- prompt-bounty/README.md | 13 ++++++++----- website/main.py | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/prompt-bounty/README.md b/prompt-bounty/README.md index 8ed816d..f046a14 100644 --- a/prompt-bounty/README.md +++ b/prompt-bounty/README.md @@ -1,5 +1,6 @@ # Prompt Injections Bounty Hunt -Submit a list of possible prompt injections in the following format: + +This is a bounty hunt to find prompt injections in the wild. The goal is to find as many as possible and then use them to test for prompt injections, to make AI safer. ## How? Put together a CSV of the following format: @@ -11,14 +12,16 @@ prompt,regex You can then use this to test for prompt injections on the following website: https://llm-cps.streamlit.app/ -## Why? -This is a bounty hunt to find prompt injections in the wild. The goal is to find as many as possible and then use them to test for prompt injections, to make AI safer. +Here are the guidelines for the prompt which you can use to test for prompt injections: ++ The parameter must not be at the end of the prompt. ++ The prompt _should_ be multi-shot, but it is not required (preferable though) ++ ## Reward For every prompt injection you find, you will receive 1 bounty point. You can then redeem these points: + 1 point = 5 USD -Only the first 20 prompts will be accepted, unless more funding is raised. +Only the **first 30 prompts will be accepted**, unless more funding is raised. ## How to submit Submit a pull request to this repository with the CSV file. The file should be named `prompt-injections.csv` and should be in the root directory of the repository. The file should be in the format described above. If you have any questions, please open an issue. @@ -27,7 +30,7 @@ Submit a pull request to this repository with the CSV file. The file should be n Once you submit your PR, it will be reviewed and merged. Once merged, you will receive a message with a link to redeem your points. You can then redeem your points for the reward. # How to fund this bounty -Please get in touch with me if you would like to fund this bounty. I am looking for funding to pay for the bounty points. Reach out here: daniel@alves.world +Please get in touch with me if you would like to fund this bounty. I am looking for funding to keep this going as long as possible. Reach out here: daniel@alves.world You can also donate to the following address: https://www.buymeacoffee.com/velocitatem24 diff --git a/website/main.py b/website/main.py index 874e2aa..8b4aa17 100644 --- a/website/main.py +++ b/website/main.py @@ -48,11 +48,11 @@ if custom_injections is not None: import openai -def runMethod(prompt): +def runMethod(evil): openai.api_key = api_key response = openai.Completion.create( engine=model, - prompt=prompt, + prompt=prompt.replace("[MASK]", evil), max_tokens=100, temperature=0.7, top_p=1, @@ -71,7 +71,7 @@ if st.button("Test Prompt"): # ex: yield (malicious_input, malicious_response, passed) for r in res: # if passed, show a cross emoji and the text failed in a header - st.markdown("### " + "Failed :x:" if r[2] else "Passed :white_check_mark:") + st.markdown("### " + "Failed :x:" if r[2] else "### Passed :white_check_mark:") # Present the results to the user in a nice way. st.write("Malicious input: " + r[0]) st.write("Malicious response: " + r[1])