Played with Redbud and got third place. The challenges are really difficult and out of my distribution. I solved PRFCasino
, LinearCasino
, OhMyDH
, softHash
and hashgame
and here’s the writeup.
softHash
Just a simulated annealing.
solve.py
import osimport torchfrom transformers import BertTokenizerfrom sentence_transformers import SentenceTransformerimport randomfrom tqdm import tqdmos.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class NeuralHash(): def __init__(self, model_path): self.idxs = [2, 9, 10, 22, 27, 43, 47, 48, 60, 61, 63, 72, 73, 74, 85, 88, 93, 114, 131, 175, 193, 216, 220, 240, 248, 270, 279, 293, 298, 302, 306, 308, 324, 330, 338, 357, 358, 367, 383, 401, 405, 413, 416, 439, 441, 447, 450, 466, 471, 483, 485, 492, 500, 510, 516, 524, 525, 536, 540, 542, 547, 549, 551, 559, 573, 578, 593, 601, 608, 612, 614, 616, 622, 623, 625, 634, 638, 644, 655, 656, 682, 684, 686, 690, 691, 716, 734, 744, 756, 763, 766, 772, 777, 788, 797, 819, 823, 837, 851, 852, 859, 863, 875, 876, 879, 881, 883, 889, 898, 901, 934, 939, 941, 945, 957, 959, 963, 970, 983, 994, 995, 997, 999, 1000, 1001, 1011, 1014, 1022] self.model = SentenceTransformer(model_path)
def embed(self, string): return self.model.encode(string, normalize_embeddings=True)
def hash(self, string): embedding = self.embed(string) res = [str(int(embedding[i] > 0)) for i in self.idxs] hash_value = hex(int(''.join(res), 2)) return hash_value
def check_diff(str1, str2, hasher): h1 = bin(int(hasher.hash(str1), 16))[2:].rjust(len(hasher.idxs), '0') h2 = bin(int(hasher.hash(str2), 16))[2:].rjust(len(hasher.idxs), '0') cnt = 0 for i in range(len(h1)): if h1[i] != h2[i]: cnt += 1 return cnt <= 6
def load_tokenizer(path): tokenizer = BertTokenizer.from_pretrained(path) print('The tokenizer is loaded successfully.') return tokenizer
def check_length(str, tokenizer): if len(tokenizer.encode(str)) > 45: return False return True
def check_suffix(str, prompt, stage): if stage == 1: return str.endswith(prompt) elif stage == 2: return str.endswith(prompt + ' [SEP]') and str.startswith('[CLS] ')
def score(tokens, stage=1): raw_str = tokenizer.decode(tokens, skip_special_tokens=False) if stage == 1: raw_str = raw_str + 'do you know how to get the flag?' elif stage == 2: raw_str = '[CLS] ' + raw_str + 'do you know how to get the flag? [SEP]' assert check_suffix(raw_str, 'do you know how to get the flag?', stage) str_hash = int(hasher.hash(raw_str), 16)
return (target_hash^str_hash).bit_count()
if __name__ == '__main__': ########### READ THIS BEFORE YOU START HACKING ############# # You need to download the model from huggingface first! # # The model: https://huggingface.co/BAAI/bge-large-en-v1.5 # # Then, replace the model path with your local path # ############################################################ model_path = './bge-large-en-v1.5'
prompt = 'do you know how to get the flag?' target = 'give me the flag right now!' hasher = NeuralHash(model_path) tokenizer = load_tokenizer(model_path) print(f'Init prompt hash: {hasher.hash(prompt)}') print(f'Init target hash: {hasher.hash(target)}')
target_hash = int(hasher.hash(target), 16)
n_tokens = 20 cursize = 25 nxtsize = 31 cur = [[random.randint(0, 30000) for _ in range(n_tokens)] for _ in range(cursize)]
while True: nxt_sets = set() for i in range(len(cur)): cc = cur[i] nxt_sets.add(tuple(cc)) for _ in range(nxtsize): nxt = cc[:] while True: t = random.randint(0, len(nxt) - 1) nxt[t] = random.randint(0, 30000) if random.random() < 0.73: break nxt_sets.add(tuple(nxt)) nxt_sets = list(nxt_sets) random.shuffle(nxt_sets) scores = torch.tensor([score(s, stage=2) for s in tqdm(nxt_sets)]) idx = torch.topk(-scores, cursize).indices print(scores[idx], scores[idx].max()) cur = [list(nxt_sets[i]) for i in idx]
if scores[idx][:12].max() <= 6: break
with open('output2.txt', 'w') as f: for s in cur: decoded = tokenizer.decode(s, skip_special_tokens=False) f.write(f'{decoded}\n') print(decoded)
with open('output.txt', 'r') as f: chall1_txt = f.readlines()
with open('output2.txt', 'r') as f: chall2_txt = f.readlines()
# challenge 1 success_stage1 = set() for i in range(6): # raw_adv = input('> ') raw_adv = chall1_txt[i].strip() raw_adv = raw_adv + 'do you know how to get the flag?' adv = tokenizer.decode(tokenizer.encode(raw_adv, add_special_tokens=False)) if target in adv: print('[+] Do not cheat! Abort!') exit() if check_length(adv, tokenizer) \ and check_suffix(adv, prompt, 1) \ and check_diff(adv, target, hasher): print(f'[+] Success in stage 1: {adv}') success_stage1.add(adv) if len(success_stage1) < 6: print(f'[+] Fail, please have another try. Success {len(success_stage1)} in stage 1') exit()
print(f'[*] Pass stage 1!')
# challenge 2 success_stage2 = set() for i in range(6): # raw_adv = input('> ') raw_adv = chall2_txt[i].strip() raw_adv = '[CLS] ' + raw_adv + 'do you know how to get the flag? [SEP]' adv = tokenizer.decode(tokenizer.encode(raw_adv, add_special_tokens=False)) if target in adv: print('[+] Do not cheat! Abort!') exit() if check_length(adv, tokenizer) \ and check_suffix(adv, prompt, 2) \ and check_diff(adv, target, hasher): print(f'[+] Success in stage 2: {adv}') success_stage2.add(adv) if len(success_stage2) < 6: print(f'[+] Fail, please have another try. Success {len(success_stage2)} in stage 2') exit()
print(f'[*] Congrats! Here is your flag: xxxx')
LinearCasino
The challenge asks to distinguish a McEliece-like matrix from a random one but I’m not good at McEliece.
My first attempt is to directly solve B using algos like ISD. Let’s first consider solving , where the D1
and D2
parts are orthogonal. It is possible to solve it by finding low-weight vectors. All of the 1s should be in D1
or D2
because we can find a lower weight vector if it’s not. It’s called the low-weight codeword problem.
But the D1
and D2
vectors are not orthogonal to each other in our case. Luckily, we only need to distinguish it from random, so the idea of finding low-weight vectors still works because half of the D2
parts are 0. Therefore, we guess that it has lower weight vector than the random matrix and the experiment supports the idea.
In the code, we try to solve the low-weight codeword problem of weight 15 in 1s. If it fails, it’s random matrix.
solve.sage
from sage.coding.information_set_decoder import LeeBrickellISDAlgorithmimport signalfrom pwn import process, remote, contextn, d1, d2 = 100, 60, 50
# context.log_level = "debug"
def timed_call(fn, args, timeout=1): def handler(signum, frame): raise TimeoutError()
signal.signal(signal.SIGALRM, handler) signal.alarm(timeout) try: return fn(*args) finally: signal.alarm(0)
def guess(M): def try_solve(): C = codes.LinearCode(M) A = LeeBrickellISDAlgorithm(C, (1, 15)) r = vector(GF(2), [0] *2*n) return A.decode(r)
try: timed_call(try_solve, (), 1) except TimeoutError: return 0 return 1
io = process(["sage", "task.sage"])
for i in range(100): io.recvuntil("🎩".encode()) mint = int(io.recvline().strip().decode()) m_list = list(map(int, list(bin(mint)[2:].zfill(2*n*(d1+d2)))))
M = matrix(GF(2), d1+d2, 2*n, m_list) decision = guess(M) print("Round", i, "Decision", decision) if decision: io.sendline(b"1") else: io.sendline(b"0")
io.interactive()
hashgame (哈基游)
You can do two things with the index php:
- inject something to the eval function
- calculate the hash of a file with selected algorithm and print nothing
To me, the most strange thing is we can choose the hash algorithm. So I checked the hash_algo
provided by php and find 3 different crc32. We all know that crc32 is affine, and 3 crc32 gives 96 bit of information, while the flag has only 90bit randomness. Therefore, we can recover the flag from hashes.
However, the initial problem is still unsolved: how to get the file hash? There’s no bypass or weakness of preg_match
and it only allows letters, digits and $_
up to 5 chars. So it’s completely safe and I can’t print anything.
It is finally solved when I randomly send cached_key and find a error traceback. When I send c=a$a
, it parses the first a
as type notation and throws error because type mismatch. In the traceback, it prints the function inputs and gives me the file hash.
The rest is simple crypto so I’ll skip it. Check the code at hashgame
.
PRFCasino
The challenge asks to distinguish a cipher stream from random bytes.
The challenge uses CBC encryption, so we can only control the first block and the rest are random plaintext and ciphertext pairs. So it’s hard to apply any differential attack or special crafted plaintext.
After exclusion, I guess that maybe the encryption is not that bijective, espesially the i*T+lrot(T,17)
.
How could it not be bijective? For example, if T<2**(64-17)
, it equals to (2**17+i)*T
. It can be extended to T<2**64
if we consider lrot(T,17)=(2**17)*T%(2**64-1)
, thus we get i*T+lrot(T,17)=(2**17+i)*T%(2**64-1)
. It is not always correct because we have wraparound after the sum, but it’ll at most happen once and +1 to the result. If gcd(2**17+i,2**64-1)>1
, the encryption is not bijective because the +1 can’t make the distribution uniform.
We have found some non-bijective issues of the encryption, how can we identify it?
There’re two lrot
used in the encryption and we should focus on the second one, which is T+lrot(T,20)
. Similar to the analysis above, gcd(2**20+1,2**64-1)=17
. If there’s no wraparound, T+lrot(T,20)%17==0
. So there’s some problem with mod 17.
In each interation, L%17,R%17=(R-wraparound)%17,L%17
, where wraparound happens at most twice. So after 30 rounds addition the distribution of wraparound
is not uniform and we can recognize it. So we count statistically and check the distribution.
from pwn import *from Crypto.Util.number import *
io = remote('121.41.238.106', 56146)
def round(test_num=200): io.recvuntil("💵".encode()) io.sendline(b"00"*16*(test_num+1)) io.recvuntil("🎩".encode())
ct = io.recvline().strip().decode() ct = bytes.fromhex(ct)
cnt = {i:0 for i in range(17)}
for i in range(test_num): L0 = ct[16*i:16*i+8] R0 = ct[16*i+8:16*i+16] L1 = ct[16*(i+1):16*(i+1)+8] R1 = ct[16*(i+1)+8:16*(i+1)+16] l_mod = (bytes_to_long(L0)-bytes_to_long(L1))%17 r_mod = (bytes_to_long(R0)-bytes_to_long(R1))%17 cnt[l_mod] += 1 cnt[r_mod] += 1
if cnt[8]+cnt[9]+cnt[10]>test_num*0.05: io.sendline(b"1") else: io.sendline(b"0")
for _ in range(100): round(100)
io.interactive()
OhMyDH
A quaternion CISDH. The best way to learn it is reading the preliminaries of papers.12345 It’s the first time I know why there’s quaternion in SQISign. The most useful thing is the Deuring Correspondence(check the Table 1 in SQISign2), and you can understand why the isogeny works on quaternion and what’s the code doing.
In quaternion, each ideal is an “isogeny” and it connects its left and right order. What you can do on isogeny pathes is also true for the ideal by Deuring Correspondence. You can feel ideal is more structural than curve, hence DH seems solvable on quaternion.
The most important algorithm stated in these papers is the KLPT algorithm1. It looks equivalent to find a smooth isogeny path of small primes with given start and end curve. But that doesn’t help because in quaternion, we can do “isogeny” on any prime as long as you give me the ideal. We don’t need to map back to curve.
If you check the repository of SQISign-Sagemath, every function takes connecting ideal as input. However, the challenge didn’t give me the connecting ideal of and , so we need to figure out the connecting ideal first. The good news is in some papers they claim computing the connecting ideal of two orders is easy. But they just skipped it!!! Finally I find the algorithm in Section 3.2 of this paper.5
The algorithm for the next step is found in the blog of SQISign-Sagemath6, where it gives me graph that takes two ideal starting from and outputs their composition. This is exact what we want for DH. So just use the function we can get the shared secret.
The whole steps are:
- Find the connecting ideal of and .
- Push forwards and to get the shared secret.
The final solution is quite simple and every function you need can be found in the SQISign-Sagemath repo, but it takes time to find the resources and understand the solution.
solve.sage
from ast import literal_evalfrom pwn import process, remote
def ideal_basis_gcd(I): """ Computes the gcd of the coefficients of the ideal written as a linear combination of the basis of its left order. """ I_basis = I.basis_matrix() O_basis = I.left_order().unit_ideal().basis_matrix()
# Write I in the basis of its left order M = I_basis * O_basis.inverse() return gcd((gcd(M_row) for M_row in M))
def make_cyclic(I, full=False): """ Given an ideal I, returns a cyclic ideal by dividing out the scalar factor g = ideal_basis_gcd(I) """ g = ideal_basis_gcd(I) # Ideal was already cyclic if g == 1: return I, g
print(f"DEBUG [make_cyclic]: Ideal is not cyclic, removing scalar factor: {g = }") J = I.scale(1/g)
if full: # TODO: will remove_2_endo change g? # not an issue currently, as we don't # use this. return remove_2_endo(J), g return J, g
def ideal_generator(I, coprime_factor=1): """ Given an ideal I of norm D, finds a generator α such that I = O(α,D) = Oα + OD
Optional: Enure the norm of the generator is coprime to the integer coprime_factor """ OI = I.left_order() D = ZZ(I.norm()) bound = ceil(4 * log(p))
gcd_norm = coprime_factor * D**2
# Stop infinite loops. for _ in range(1000): α = sum([b * randint(-bound, bound) for b in I.basis()]) if gcd(ZZ(α.reduced_norm()), gcd_norm) == D: assert I == OI * α + OI * D return α raise ValueError(f"Cannot find a good α for D = {D}, I = {I}, n(I) = {D}")
def pushforward_ideal(O0, O1, I, Iτ): """ Input: Ideal I left order O0 Connecting ideal Iτ with left order O0 and right order O1 Output The ideal given by the pushforward [Iτ]_* I """ assert I.left_order() == O0 assert Iτ.left_order() == O0 assert Iτ.right_order() == O1
N = ZZ(I.norm()) Nτ = ZZ(Iτ.norm())
K = I.intersection(O1 * Nτ) α = ideal_generator(K) return O1 * N + O1 * (α / Nτ)
FLAG = "aliyunctf{REDACTED}"
ells = [*primes(3, 128), 163]p = 4*prod(ells)-1B = QuaternionAlgebra(-1, -p)i,j,k = B.gens()O0 = B.quaternion_order([1, i, (i+j)/2, (1+k)/2])
io = process(["sage", "task.sage"])io.sendline(b"[0]")
io.recvuntil(b"Oa: ")Oa_str = io.recvline().strip().decode()io.recvuntil(b"Ob: ")Ob_str = io.recvline().strip().decode()
Oa = B.quaternion_order(sage_eval(Oa_str, locals={"i":i,"j":j,"k":k}))Ob = B.quaternion_order(sage_eval(Ob_str, locals={"i":i,"j":j,"k":k}))
I, _ = make_cyclic(O0*Oa)J, _ = make_cyclic(O0*Ob)
U = pushforward_ideal(O0, J.right_order(), I, J)
serial = ""basis = U.right_order().basis()for b in basis: for c in b.coefficient_tuple(): serial += str(c) + " "serial = serial.strip()
io.sendline(serial)io.interactive()