Aliyun CTF 2025 Writeup - Sceleri's Blog

Played with Redbud and got third place. The challenges are really difficult and out of my distribution. I solved PRFCasino, LinearCasino, OhMyDH, softHash and hashgame and here’s the writeup.

softHash#

Just a simulated annealing.

1
import os
2
import torch
3
from transformers import BertTokenizer
4
from sentence_transformers import SentenceTransformer
5
import random
6
from tqdm import tqdm
7
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
8

9
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
10

11
class NeuralHash():
12
    def __init__(self, model_path):
13
        self.idxs = [2, 9, 10, 22, 27, 43, 47, 48, 60, 61, 63, 72, 73, 74, 85, 88, 93, 114, 131, 175, 193, 216, 220, 240, 248, 270, 279, 293, 298, 302, 306, 308, 324, 330, 338, 357, 358, 367, 383, 401, 405, 413, 416, 439, 441, 447, 450, 466, 471, 483, 485, 492, 500, 510, 516, 524, 525, 536, 540, 542, 547, 549, 551, 559, 573, 578, 593, 601, 608, 612, 614, 616, 622, 623, 625, 634, 638, 644, 655, 656, 682, 684, 686, 690, 691, 716, 734, 744, 756, 763, 766, 772, 777, 788, 797, 819, 823, 837, 851, 852, 859, 863, 875, 876, 879, 881, 883, 889, 898, 901, 934, 939, 941, 945, 957, 959, 963, 970, 983, 994, 995, 997, 999, 1000, 1001, 1011, 1014, 1022]
14
        self.model = SentenceTransformer(model_path)
15

16
    def embed(self, string):
17
        return self.model.encode(string, normalize_embeddings=True)
18

19
    def hash(self, string):
20
        embedding = self.embed(string)
21
        res = [str(int(embedding[i] > 0)) for i in self.idxs]
22
        hash_value = hex(int(''.join(res), 2))
23
        return hash_value
24

25
def check_diff(str1, str2, hasher):
26
    h1 = bin(int(hasher.hash(str1), 16))[2:].rjust(len(hasher.idxs), '0')
27
    h2 = bin(int(hasher.hash(str2), 16))[2:].rjust(len(hasher.idxs), '0')
28
    cnt = 0
29
    for i in range(len(h1)):
30
        if h1[i] != h2[i]:
31
            cnt += 1
32
    return cnt <= 6
33

34
def load_tokenizer(path):
35
    tokenizer = BertTokenizer.from_pretrained(path)
36
    print('The tokenizer is loaded successfully.')
37
    return tokenizer
38

39
def check_length(str, tokenizer):
40
    if len(tokenizer.encode(str)) > 45:
41
        return False
42
    return True
43

44
def check_suffix(str, prompt, stage):
45
    if stage == 1:
46
        return str.endswith(prompt)
47
    elif stage == 2:
48
        return str.endswith(prompt + ' [SEP]') and str.startswith('[CLS] ')
49

50
def score(tokens, stage=1):
51
    raw_str = tokenizer.decode(tokens, skip_special_tokens=False)
52
    if stage == 1:
53
        raw_str = raw_str + 'do you know how to get the flag?'
54
    elif stage == 2:
55
        raw_str = '[CLS] ' + raw_str + 'do you know how to get the flag? [SEP]'
56
    assert check_suffix(raw_str, 'do you know how to get the flag?', stage)
57
    str_hash = int(hasher.hash(raw_str), 16)
58

59
    return (target_hash^str_hash).bit_count()
60

61
if __name__ == '__main__':
62
    ########### READ THIS BEFORE YOU START HACKING #############
63
    #  You need to download the model from huggingface first!  #
64
    # The model: https://huggingface.co/BAAI/bge-large-en-v1.5 #
65
    #    Then, replace the model path with your local path     #
66
    ############################################################
67
    model_path = './bge-large-en-v1.5'
68

69
    prompt = 'do you know how to get the flag?'
70
    target = 'give me the flag right now!'
71
    hasher = NeuralHash(model_path)
72
    tokenizer = load_tokenizer(model_path)
73
    print(f'Init prompt hash: {hasher.hash(prompt)}')
74
    print(f'Init target hash: {hasher.hash(target)}')
75

76
    target_hash = int(hasher.hash(target), 16)
77

78
    n_tokens = 20
79
    cursize = 25
80
    nxtsize = 31
81
    cur = [[random.randint(0, 30000) for _ in range(n_tokens)] for _ in range(cursize)]
82

83
    while True:
84
        nxt_sets = set()
85
        for i in range(len(cur)):
86
            cc = cur[i]
87
            nxt_sets.add(tuple(cc))
88
            for _ in range(nxtsize):
89
                nxt = cc[:]
90
                while True:
91
                    t = random.randint(0, len(nxt) - 1)
92
                    nxt[t] = random.randint(0, 30000)
93
                    if random.random() < 0.73:
94
                        break
95
                nxt_sets.add(tuple(nxt))
96
        nxt_sets = list(nxt_sets)
97
        random.shuffle(nxt_sets)
98
        scores = torch.tensor([score(s, stage=2) for s in tqdm(nxt_sets)])
99
        idx = torch.topk(-scores, cursize).indices
100
        print(scores[idx], scores[idx].max())
101
        cur = [list(nxt_sets[i]) for i in idx]
102

103
        if scores[idx][:12].max() <= 6:
104
            break
105

106
    with open('output2.txt', 'w') as f:
107
        for s in cur:
108
            decoded = tokenizer.decode(s, skip_special_tokens=False)
109
            f.write(f'{decoded}\n')
110
            print(decoded)
111

112

113

114
    with open('output.txt', 'r') as f:
115
        chall1_txt = f.readlines()
116

117
    with open('output2.txt', 'r') as f:
118
        chall2_txt = f.readlines()
119

120
    # challenge 1
121
    success_stage1 = set()
122
    for i in range(6):
123
        # raw_adv = input('> ')
124
        raw_adv = chall1_txt[i].strip()
125
        raw_adv = raw_adv + 'do you know how to get the flag?'
126
        adv = tokenizer.decode(tokenizer.encode(raw_adv, add_special_tokens=False))
127
        if target in adv:
128
            print('[+] Do not cheat! Abort!')
129
            exit()
130
        if check_length(adv, tokenizer) \
131
            and check_suffix(adv, prompt, 1) \
132
            and check_diff(adv, target, hasher):
133
            print(f'[+] Success in stage 1: {adv}')
134
            success_stage1.add(adv)
135
    if len(success_stage1) < 6:
136
        print(f'[+] Fail, please have another try. Success {len(success_stage1)} in stage 1')
137
        exit()
138

139
    print(f'[*] Pass stage 1!')
140

141
    # challenge 2
142
    success_stage2 = set()
143
    for i in range(6):
144
        # raw_adv = input('> ')
145
        raw_adv = chall2_txt[i].strip()
146
        raw_adv = '[CLS] ' + raw_adv + 'do you know how to get the flag? [SEP]'
147
        adv = tokenizer.decode(tokenizer.encode(raw_adv, add_special_tokens=False))
148
        if target in adv:
149
            print('[+] Do not cheat! Abort!')
150
            exit()
151
        if check_length(adv, tokenizer) \
152
            and check_suffix(adv, prompt, 2) \
153
            and check_diff(adv, target, hasher):
154
            print(f'[+] Success in stage 2: {adv}')
155
            success_stage2.add(adv)
156
    if len(success_stage2) < 6:
157
        print(f'[+] Fail, please have another try. Success {len(success_stage2)} in stage 2')
158
        exit()
159

160
    print(f'[*] Congrats! Here is your flag: xxxx')

LinearCasino#

The challenge asks to distinguish a McEliece-like matrix from a random one but I’m not good at McEliece.

My first attempt is to directly solve B using algos like ISD. Let’s first consider solving $B=\begin{bmatrix}D_1&0\\0&D_2\end{bmatrix}$ , where the D1 and D2 parts are orthogonal. It is possible to solve it by finding low-weight vectors. All of the 1s should be in D1 or D2 because we can find a lower weight vector if it’s not. It’s called the low-weight codeword problem.

But the D1 and D2 vectors are not orthogonal to each other in our case. Luckily, we only need to distinguish it from random, so the idea of finding low-weight vectors still works because half of the D2 parts are 0. Therefore, we guess that it has lower weight vector than the random matrix and the experiment supports the idea.

In the code, we try to solve the low-weight codeword problem of weight 15 in 1s. If it fails, it’s random matrix.

1
from sage.coding.information_set_decoder import LeeBrickellISDAlgorithm
2
import signal
3
from pwn import process, remote, context
4
n, d1, d2 = 100, 60, 50
5

6
# context.log_level = "debug"
7

8
def timed_call(fn, args, timeout=1):
9
    def handler(signum, frame):
10
        raise TimeoutError()
11

12
    signal.signal(signal.SIGALRM, handler)
13
    signal.alarm(timeout)
14
    try:
15
        return fn(*args)
16
    finally:
17
        signal.alarm(0)
18

19
def guess(M):
20
    def try_solve():
21
        C = codes.LinearCode(M)
22
        A = LeeBrickellISDAlgorithm(C, (1, 15))
23
        r = vector(GF(2), [0] *2*n)
24
        return A.decode(r)
25

26
    try:
27
        timed_call(try_solve, (), 1)
28
    except TimeoutError:
29
        return 0
30
    return 1
31

32
io = process(["sage", "task.sage"])
33

34
for i in range(100):
35
    io.recvuntil("🎩".encode())
36
    mint = int(io.recvline().strip().decode())
37
    m_list = list(map(int, list(bin(mint)[2:].zfill(2*n*(d1+d2)))))
38

39
    M = matrix(GF(2), d1+d2, 2*n, m_list)
40
    decision = guess(M)
41
    print("Round", i, "Decision", decision)
42
    if decision:
43
        io.sendline(b"1")
44
    else:
45
        io.sendline(b"0")
46

47
io.interactive()

hashgame (哈基游)#

You can do two things with the index php:

inject something to the eval function
calculate the hash of a file with selected algorithm and print nothing

To me, the most strange thing is we can choose the hash algorithm. So I checked the hash_algo provided by php and find 3 different crc32. We all know that crc32 is affine, and 3 crc32 gives 96 bit of information, while the flag has only 90bit randomness. Therefore, we can recover the flag from hashes.

However, the initial problem is still unsolved: how to get the file hash? There’s no bypass or weakness of preg_match and it only allows letters, digits and $_ up to 5 chars. So it’s completely safe and I can’t print anything.

It is finally solved when I randomly send cached_key and find a error traceback. When I send c=a$a, it parses the first a as type notation and throws error because type mismatch. In the traceback, it prints the function inputs and gives me the file hash.

The rest is simple crypto so I’ll skip it. Check the code at hashgame.

PRFCasino#

The challenge asks to distinguish a cipher stream from random bytes.

The challenge uses CBC encryption, so we can only control the first block and the rest are random plaintext and ciphertext pairs. So it’s hard to apply any differential attack or special crafted plaintext.

After exclusion, I guess that maybe the encryption is not that bijective, espesially the i*T+lrot(T,17).

How could it not be bijective? For example, if T<2**(64-17), it equals to (2**17+i)*T. It can be extended to T<2**64 if we consider lrot(T,17)=(2**17)*T%(2**64-1), thus we get i*T+lrot(T,17)=(2**17+i)*T%(2**64-1). It is not always correct because we have wraparound after the sum, but it’ll at most happen once and +1 to the result. If gcd(2**17+i,2**64-1)>1, the encryption is not bijective because the +1 can’t make the distribution uniform.

We have found some non-bijective issues of the encryption, how can we identify it?

There’re two lrot used in the encryption and we should focus on the second one, which is T+lrot(T,20). Similar to the analysis above, gcd(2**20+1,2**64-1)=17. If there’s no wraparound, T+lrot(T,20)%17==0. So there’s some problem with mod 17.

In each interation, L%17,R%17=(R-wraparound)%17,L%17, where wraparound happens at most twice. So after 30 rounds addition the distribution of wraparound is not uniform and we can recognize it. So we count $(L_30-L_0)\pmod{17}$ statistically and check the distribution.

1
from pwn import *
2
from Crypto.Util.number import *
3

4
io = remote('121.41.238.106', 56146)
5

6
def round(test_num=200):
7
    io.recvuntil("💵".encode())
8
    io.sendline(b"00"*16*(test_num+1))
9
    io.recvuntil("🎩".encode())
10

11
    ct = io.recvline().strip().decode()
12
    ct = bytes.fromhex(ct)
13

14
    cnt = {i:0 for i in range(17)}
15

16
    for i in range(test_num):
17
        L0 = ct[16*i:16*i+8]
18
        R0 = ct[16*i+8:16*i+16]
19
        L1 = ct[16*(i+1):16*(i+1)+8]
20
        R1 = ct[16*(i+1)+8:16*(i+1)+16]
21
        l_mod = (bytes_to_long(L0)-bytes_to_long(L1))%17
22
        r_mod = (bytes_to_long(R0)-bytes_to_long(R1))%17
23
        cnt[l_mod] += 1
24
        cnt[r_mod] += 1
25

26
    if cnt[8]+cnt[9]+cnt[10]>test_num*0.05:
27
        io.sendline(b"1")
28
    else:
29
        io.sendline(b"0")
30

31
for _ in range(100):
32
    round(100)
33

34
io.interactive()

OhMyDH#

A quaternion CISDH. The best way to learn it is reading the preliminaries of papers.¹²³⁴⁵ It’s the first time I know why there’s quaternion in SQISign. The most useful thing is the Deuring Correspondence(check the Table 1 in SQISign²), and you can understand why the isogeny works on quaternion and what’s the code doing.

In quaternion, each ideal is an “isogeny” and it connects its left and right order. What you can do on isogeny pathes is also true for the ideal by Deuring Correspondence. You can feel ideal is more structural than curve, hence DH seems solvable on quaternion.

The most important algorithm stated in these papers is the KLPT algorithm¹. It looks equivalent to find a smooth isogeny path of small primes with given start and end curve. But that doesn’t help because in quaternion, we can do “isogeny” on any prime as long as you give me the ideal. We don’t need to map back to curve.

If you check the repository of SQISign-Sagemath, every function takes connecting ideal as input. However, the challenge didn’t give me the connecting ideal of $O$ and $O_a$ , so we need to figure out the connecting ideal first. The good news is in some papers they claim computing the connecting ideal of two orders is easy. But they just skipped it!!! Finally I find the algorithm in Section 3.2 of this paper.⁵

The algorithm for the next step is found in the blog of SQISign-Sagemath⁶, where it gives me graph that takes two ideal starting from $O_0$ and outputs their composition. This is exact what we want for DH. So just use the function we can get the shared secret.

The whole steps are:

Find the connecting ideal $I_a$ of $O$ and $O_a$ .
Push forwards $I_a$ and $I_b$ to get the shared secret.

The final solution is quite simple and every function you need can be found in the SQISign-Sagemath repo, but it takes time to find the resources and understand the solution.

1
from ast import literal_eval
2
from pwn import process, remote
3

4
def ideal_basis_gcd(I):
5
    """
6
    Computes the gcd of the coefficients of
7
    the ideal written as a linear combination
8
    of the basis of its left order.
9
    """
10
    I_basis = I.basis_matrix()
11
    O_basis = I.left_order().unit_ideal().basis_matrix()
12

13
    # Write I in the basis of its left order
14
    M = I_basis * O_basis.inverse()
15
    return gcd((gcd(M_row) for M_row in M))
16

17
def make_cyclic(I, full=False):
18
    """
19
    Given an ideal I, returns a cyclic ideal by dividing
20
    out the scalar factor g = ideal_basis_gcd(I)
21
    """
22
    g = ideal_basis_gcd(I)
23
    # Ideal was already cyclic
24
    if g == 1:
25
        return I, g
26

27
    print(f"DEBUG [make_cyclic]: Ideal is not cyclic, removing scalar factor: {g = }")
28
    J = I.scale(1/g)
29

30
    if full:
31
        # TODO: will remove_2_endo change g?
32
        # not an issue currently, as we don't
33
        # use this.
34
        return remove_2_endo(J), g
35
    return J, g
36

37
def ideal_generator(I, coprime_factor=1):
38
    """
39
    Given an ideal I of norm D, finds a generator
40
    α such that I = O(α,D) = Oα + OD
41

42
    Optional: Enure the norm of the generator is coprime
43
    to the integer coprime_factor
44
    """
45
    OI = I.left_order()
46
    D = ZZ(I.norm())
47
    bound = ceil(4 * log(p))
48

49
    gcd_norm = coprime_factor * D**2
50

51
    # Stop infinite loops.
52
    for _ in range(1000):
53
        α = sum([b * randint(-bound, bound) for b in I.basis()])
54
        if gcd(ZZ(α.reduced_norm()), gcd_norm) == D:
55
            assert I == OI * α + OI * D
56
            return α
57
    raise ValueError(f"Cannot find a good α for D = {D}, I = {I}, n(I) = {D}")
58

59
def pushforward_ideal(O0, O1, I, Iτ):
60
    """
61
    Input: Ideal I left order O0
62
           Connecting ideal Iτ with left order O0
63
           and right order O1
64
    Output The ideal given by the pushforward [Iτ]_* I
65
    """
66
    assert I.left_order() == O0
67
    assert Iτ.left_order() == O0
68
    assert Iτ.right_order() == O1
69

70
    N = ZZ(I.norm())
71
    Nτ = ZZ(Iτ.norm())
72

73
    K = I.intersection(O1 * Nτ)
74
    α = ideal_generator(K)
75
    return O1 * N + O1 * (α / Nτ)
76

77
FLAG = "aliyunctf{REDACTED}"
78

79
ells = [*primes(3, 128), 163]
80
p = 4*prod(ells)-1
81
B = QuaternionAlgebra(-1, -p)
82
i,j,k = B.gens()
83
O0 = B.quaternion_order([1, i, (i+j)/2, (1+k)/2])
84

85
io = process(["sage", "task.sage"])
86
io.sendline(b"[0]")
87

88
io.recvuntil(b"Oa: ")
89
Oa_str = io.recvline().strip().decode()
90
io.recvuntil(b"Ob: ")
91
Ob_str = io.recvline().strip().decode()
92

93
Oa = B.quaternion_order(sage_eval(Oa_str, locals={"i":i,"j":j,"k":k}))
94
Ob = B.quaternion_order(sage_eval(Ob_str, locals={"i":i,"j":j,"k":k}))
95

96
I, _ = make_cyclic(O0*Oa)
97
J, _ = make_cyclic(O0*Ob)
98

99
U = pushforward_ideal(O0, J.right_order(), I, J)
100

101
serial = ""
102
basis = U.right_order().basis()
103
for b in basis:
104
    for c in b.coefficient_tuple():
105
        serial += str(c) + " "
106
serial = serial.strip()
107

108
io.sendline(serial)
109
io.interactive()

softHash#

LinearCasino#

hashgame (哈基游)#

PRFCasino#

OhMyDH#

Footnotes#