/* * Copyright (C) 2007 Apple Inc. All rights reserved. * * This document is the property of Apple Inc. * It is considered confidential and proprietary. * * This document may not be reproduced or transmitted in any form, * in whole or in part, without the express written permission of * Apple Inc. */ # $Copyright: # ---------------------------------------------------------------- # This confidential and proprietary software may be used only as # authorized by a licensing agreement from Samsung Limited # (C) COPYRIGHT 1999,2001,2002 Samsung Limited # ALL RIGHTS RESERVED # The entire notice above must be reproduced on all authorized # copies and copies may only be made to the extent permitted # by a licensing agreement from Samsung Limited. # ---------------------------------------------------------------- # File: DevPke_R2modM.s # Tornado2 File: tor_cal_R2modM.s # Revision: 1.0 # 2004.05.18. V1.0 # - Calculate R^2 modM value # 2005.01.12 V1.1 # - ARM/Thumb interworking enabled in ARMv4(SC100) # # ---------------------------------------------------------------- # Assembler Tornado device driver # # adapted for as(1); comments are bumped to the next line. #include .text .globl _L_TCR_BASE .globl _L_SEG_BASE # TORNADO2 control area 0x3D000000 # Segment base address 0x3D000800 _L_TCR_BASE: .long PKE_BASE_ADDR _L_SEG_BASE: .long PKE_BASE_ADDR+0x0800 # segment ids used for calculation depending on segment size .set SEG_4, 04 .set SEG_5, 05 .set SEG_6, 06 .set SEG_12, 12 .set SEG_13, 13 .set SEG_14, 14 .set SEG_27, 27 .set SEG_28, 28 .set SEG_29, 29 L_00000040: .long 0x00000040 L_00004000: .long 0x00004000 L_20000000: .long 0x20000000 # segment operand operation config L_04040006: .long 0x04040006 L_05050004: .long 0x05050004 L_05050006: .long 0x05050006 L_06060005: .long 0x06060005 L_0C0C000E: .long 0x0C0C000E L_0D0D000C: .long 0x0D0D000C L_0D0D000E: .long 0x0D0D000E L_0E0E000D: .long 0x0E0E000D L_1B1B001D: .long 0x1B1B001D L_1C1C001D: .long 0x1C1C001D L_1C1C001b: .long 0x1C1C001b L_1D1D001C: .long 0x1D1D001C # bit test mask L_80000000: .long 0x80000000 # BIT_LEN is programmer defined real key size. ############################################### #### NAME : PkeCalR2modM #### #### Tornado NAME : tor_cal_R2modM #### ############################################### .globl _PkeCalR2modM #================================================ # Name : PkeCalR2modM(int BIT_LEN) # Tornado Name : tor_cal_R2modM(int BIT_LEN) # Function : 1. Clear the last 3 segment area # 2. Calculate R*2^((C/16)+1) modM value # 3. Calculate R*RmodM by using TORNADO # and save this value in Seg6 or Seg14 or Seg29. # 4. Clear temp segment area. # Requisite : Modulus value must be copied from memory to Seg0 # before using this function. # Argument # int BIT_LEN : real Key size #================================================ _PkeCalR2modM: stmfd sp!, {r1-r12,lr} # reserved register values # r0 = BIT_LEN # r12= Loop_count(hw_size/32) #==================== # Clear CryptoRAM area (Seg4 ~ Seg6) when SEG_SIZE=0 #==================== # read SEG_SIZE ldr r1, _L_TCR_BASE ldr r2, [r1,$0x14] and r2, r2, $0xC0 mov r2, r2, LSR #6 cmp r2, #1 bhi clr_64 # 1 segment = 64byte blt clr_256 # 1 segment = 256byte ## else 1 segment =128byte ldr r1, =SEG_12 # Start position ldr r2,_L_SEG_BASE orr r1,r2,r1, lsl #7 ldr r2, =96 b start_clr clr_64: ldr r1, =SEG_27 # Start position ldr r2,_L_SEG_BASE orr r1,r2,r1, lsl #6 ldr r2, =48 b start_clr clr_256: ldr r1, =SEG_4 # Start position ldr r2,_L_SEG_BASE orr r1,r2,r1, lsl #8 ldr r2, =192 start_clr: mov r3,#0 mov r4,r3 mov r5,r3 mov r6,r3 mov r7,r3 mov r8,r3 mov r9,r3 mov r10,r3 clear_loop_cal: stmia r1!, {r3-r10} subs r2,r2,#8 bne clear_loop_cal #==================================================== # Calculate Montgomery constant #==================================================== # 1. Calculate R0=R*2^((c/16)+1)modM # 2. Calculate R1,R2, .. Rn # R[i]=(R[i-1]^2)*R^(-1)modM # n such that 2^n=p*16 (p=1 ==> n=4 # ... P=4 ==> n=6) # 3. If final output is neg, change it to pos one #===================================================== mov r1,r0 # save BIT_LEN in r1 #==================== # Calculate R0=R*2^((c/16)+1)modM #==================== # cal power=(p*(c+16)+(c/16)+1) ldr r0, _L_TCR_BASE ldr r0,[r0] and r2,r0,#0x03 add r2,r2,#1 # p value and r3,r0,#0x78 add r3,r3,#0x08 mov r3,r3, lsl #2 # c value mul r12,r2,r3 mov r12,r12,LSR #5 # r12= Loop_count(hw_size/32) add r4,r3,#16 mul r6,r4,r2 # r6=p*(c+16) mov r5,r3, LSR #4 add r5,r5,#1 # r5=(c/16)+1 mov r0, #3 mov r3,r5 cmp r2, #3 muleq r5,r3,r0 # precision = 0x10 case add r2,r6,r5 # store power in r2 # calculate b value ldr r0, _L_SEG_BASE add r3,r1,#31 mov r3,r3,LSR #5 # r3= (BIT_LEN+31)/32 mov r3,r3,lsl #2 sub r3,r3,#4 add r0,r0,r3 # r0 = MSW address of Modulus mov r7,r1 # r7= BIT_LEN add r7,r7,#0x1F mov r7,r7,LSR #5 mov r7,r7,lsl #5 # r7=(r7+31)/32*32 next_MSW: # Searching MSW cmp r7,#0 beq end_tor_calR2 ## if r2=0 then goto end_... ldr r4,[r0],#-4 cmp r4,#0 subeq r7,r7,#32 beq next_MSW ldr r5,L_80000000 next_MSB: ands r6,r4,r5 subeq r7,r7,#1 mov r5,r5,LSR #1 beq next_MSB # b = r7 sub r3,r2,r7 # r3=power - b # b is real bit size of Modulus # cal a=8-(power-b)%8 (1<= a <=8) mod8_loop: cmp r3,#8 blt cal_a sub r3,r3,#8 b mod8_loop cal_a: rsb r3,r3,#8 # r3=a # read SEG_SIZE ldr r4, _L_TCR_BASE ldr r8, [r4,#0x14] and r8, r8, #0xC0 mov r8, r8, LSR #6 cmp r8, #1 bhi cal_64 # 1 segment = 64byte blt cal_256 # 1 segment = 256byte ## else 1 segment =128byte ldr r4, =SEG_12 # Start position ldr r8,_L_SEG_BASE orr r4,r8,r4, lsl #7 b cal_segset_endup cal_64: ldr r4, =SEG_28 # Start position ldr r8,_L_SEG_BASE orr r4,r8,r4, lsl #6 b cal_segset_endup cal_256: ldr r4, =SEG_5 # Start position ldr r8,_L_SEG_BASE orr r4,r8,r4, lsl #8 cal_segset_endup: sub r5,r7,r3 mov r6,r5, LSR #5 add r4,r4,r6, lsl #2 # r4=SEG_4+((b-a)/32)*4 mod32_loop: cmp r5,#32 blt load_power sub r5,r5,#32 # r5=(b-a)%32 b mod32_loop load_power: mov r6,#0x01 mov r6,r6, lsl r5 str r6,[r4] # (0x01<<(BIT_LEN-a)%32) div_modM: #loop round value < ((power-b+a)/8) sub r2,r2,r7 add r2,r2,r3 mov r0,r2,LSR #3 # r0=(power-b+a)/8 # reserved reg : r0, r1=BIT_LEN, r12=hw_size/32 # available reg :r2-11 stmfd sp!, {r0} # available reg :r0,r2-r12 r1=store r12 value while shift_block #==================== # Start div_byte #==================== start_div_byte: mov r3,#9 # 9=8 count value +1 stmfd sp!,{r3} # !!tor_dev_byte 8 count value is saved in stack!! mov r1,r12 reload_shift: # read SEG_SIZE ldr r0, _L_TCR_BASE ldr r2, [r0,#0x14] and r2, r2, #0xC0 mov r2, r2, LSR #6 cmp r2, #1 bhi reload_64 # 1 segment = 64byte blt reload_256 # 1 segment = 256byte ## else 1 segment =128byte ldr r0, =SEG_12 # Start position ldr r2,_L_SEG_BASE orr r0,r2,r0, lsl #7 b reload_segset_endup reload_64: ldr r0, =SEG_28 # Start position ldr r2,_L_SEG_BASE orr r0,r2,r0, lsl #6 b reload_segset_endup reload_256: ldr r0, =SEG_5 # Start position ldr r2,_L_SEG_BASE orr r0,r2,r0, lsl #8 reload_segset_endup: mov r3,#0 # clr r3 mov r12,r1 ldmfd sp!,{r5} subs r5,r5,#1 stmfd sp!,{r5} beq end_div_byte cmp r12,#8 blt shift_one #==================== # Start shift left operation #==================== shift_block: # r0=address,r3=carry1,r2=carry2 # r4-r11=working reg,r12=shift loop counter #ldm r0, {r4-r11} ldmia r0, {r4,r5,r6,r7,r8,r9,r10,r11} mov r2,r4, LSR #31 # save carry add r4,r3,r4, lsl #1 # shift mov r3,r5, LSR #31 # save carry add r5,r2,r5, lsl #1 # shift mov r2,r6, LSR #31 # save carry add r6,r3,r6, lsl #1 # shift mov r3,r7, LSR #31 # save carry add r7,r2,r7, lsl #1 # shift mov r2,r8, LSR #31 # save carry add r8,r3,r8, lsl #1 # shift mov r3,r9, LSR #31 # save carry add r9,r2,r9, lsl #1 # shift mov r2,r10, LSR #31 # save carry add r10,r3,r10, lsl #1 # shift mov r3,r11, LSR #31 # save carry add r11,r2,r11, lsl #1 # shift stmia r0!,{r4-r11} sub r12,r12,#8 cmp r12,#8 bge shift_block shift_one: cmp r12,#0 beq cmp_carry1 ldr r4,[r0] mov r2,r4, LSR #31 add r4,r3,r4, lsl #1 mov r3,r2 str r4,[r0],#4 sub r12,r12,#1 b shift_one #==================== # Start compare operation #==================== cmp_carry1: cmp r3,#0 bne tor_sub greater_equal: # available reg:r2-r12 sub r0,r0 ,#4 # MSW address of power(0xE04XX) ldr r2, _L_SEG_BASE # Modulus pointer value and r3,r0,#0xFF add r2,r2,r3 # MSW address of Modulus(0xE00XX) test_loop: ldr r3,[r0],#-4 # r3=power value ldr r4,[r2],#-4 # r2=Modulus value cmp r3,r4 beq test_loop bhi tor_sub # pow>Mod # pow