718 lines
21 KiB
C
718 lines
21 KiB
C
|
/*
|
||
|
* Copyright (C) 2008 Apple Inc. All rights reserved.
|
||
|
*
|
||
|
* This document is the property of Apple Inc.
|
||
|
* It is considered confidential and proprietary.
|
||
|
*
|
||
|
* This document may not be reproduced or transmitted in any form,
|
||
|
* in whole or in part, without the express written permission of
|
||
|
* Apple Inc.
|
||
|
*/
|
||
|
#include <string.h>
|
||
|
#include "AppleS5L8900XPKE-hardware.h"
|
||
|
|
||
|
|
||
|
#define AppleS5L8900XPKE_hardware_DEBUG 0
|
||
|
#undef absolutetime_to_nanoseconds
|
||
|
|
||
|
#if AppleS5L8900XPKE_hardware_DEBUG
|
||
|
static void debug_data(uint8_t *data, size_t length)
|
||
|
{
|
||
|
uint32_t i=0;
|
||
|
printf("data %p,%d\n" , data, length);
|
||
|
for (i = 0; i < length; i++) {
|
||
|
if( !(i%256) )
|
||
|
printf("Seg : %p \n", data);
|
||
|
printf("%02X, ", *(data + i));
|
||
|
if ((i % 16) == 15)
|
||
|
printf("\n");
|
||
|
}
|
||
|
printf("\n");
|
||
|
}
|
||
|
#define debug(fmt, args...) printf("AppleS5L8900XPKE-hardware::%s: " fmt "\n", __FUNCTION__ , ##args)
|
||
|
#else
|
||
|
#define debug(fmt, args...)
|
||
|
#define debug_data(data, length)
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#if IN_KERNEL
|
||
|
extern uint64_t iodelay_time[3];
|
||
|
static uint64_t delay_time;
|
||
|
#endif
|
||
|
|
||
|
|
||
|
uint32_t
|
||
|
PkeSetMode(uint32_t uSegSize)
|
||
|
{
|
||
|
uint32_t uWriteVal;
|
||
|
|
||
|
switch (uSegSize) {
|
||
|
case PKE_SEG_SIZE_256:
|
||
|
uWriteVal = PKE_REGVAL_SEG_SIZE_SEGSIZE_256;
|
||
|
break;
|
||
|
|
||
|
case PKE_SEG_SIZE_128:
|
||
|
uWriteVal = PKE_REGVAL_SEG_SIZE_SEGSIZE_128;
|
||
|
break;
|
||
|
|
||
|
case PKE_SEG_SIZE_64:
|
||
|
uWriteVal = PKE_REGVAL_SEG_SIZE_SEGSIZE_64;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
uWriteVal = 0;
|
||
|
debug("error setting segment size/mode\n");
|
||
|
}
|
||
|
|
||
|
/* ! write SEG_SIZE and MODE_ID */
|
||
|
return (uWriteVal | PKE_REGMASK_SEG_SIZE_MUSTBEONE);
|
||
|
}
|
||
|
|
||
|
uint32_t
|
||
|
PkeGetSegmentCount(uint32_t uSegSize)
|
||
|
{
|
||
|
/* return segment total number */
|
||
|
switch (uSegSize) {
|
||
|
case PKE_SEG_SIZE_256:
|
||
|
return PKE_SEG_NUM_CASE256;
|
||
|
case PKE_SEG_SIZE_128:
|
||
|
return PKE_SEG_NUM_CASE128;
|
||
|
case PKE_SEG_SIZE_64:
|
||
|
return PKE_SEG_NUM_CASE64;
|
||
|
default:
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static uint32_t get_precision(uint32_t uBitLen)
|
||
|
{
|
||
|
uint32_t uPrecision;
|
||
|
|
||
|
if (uBitLen <= PKE_LEN_BIT_512) {
|
||
|
uPrecision = PKE_SIZE_PREC_0;
|
||
|
} else if ((uBitLen <= PKE_LEN_BIT_1024) && (uBitLen != PKE_LEN_BIT_672) && (uBitLen != PKE_LEN_BIT_864)) {
|
||
|
uPrecision = PKE_SIZE_PREC_1;
|
||
|
} else if ((uBitLen <= PKE_LEN_BIT_1536) && (uBitLen != PKE_LEN_BIT_1280) && (uBitLen != PKE_LEN_BIT_1408)) {
|
||
|
uPrecision = PKE_SIZE_PREC_2;
|
||
|
} else {
|
||
|
uPrecision = PKE_SIZE_PREC_3;
|
||
|
}
|
||
|
return uPrecision;
|
||
|
}
|
||
|
|
||
|
static uint32_t get_chunkSize(uint32_t uBitLen, uint32_t uPrecision)
|
||
|
{
|
||
|
uint32_t uDiv, uChunk;
|
||
|
|
||
|
switch (uPrecision) {
|
||
|
case PKE_SIZE_PREC_0:
|
||
|
if (uBitLen < PKE_LEN_BIT_128) {
|
||
|
uBitLen = PKE_LEN_BIT_128;
|
||
|
}
|
||
|
uDiv = uBitLen >> 5;
|
||
|
uChunk = uDiv;
|
||
|
if (uBitLen - (uChunk << 5)) {
|
||
|
uDiv++;
|
||
|
}
|
||
|
uChunk = --uDiv;
|
||
|
break;
|
||
|
|
||
|
case PKE_SIZE_PREC_1:
|
||
|
uDiv = uBitLen >> 6;
|
||
|
uChunk = uDiv;
|
||
|
if (uBitLen - (uChunk << 6)) {
|
||
|
uDiv++;
|
||
|
}
|
||
|
uChunk = --uDiv;
|
||
|
break;
|
||
|
|
||
|
case PKE_SIZE_PREC_2:
|
||
|
uDiv = (uBitLen / 3) >> 5;
|
||
|
uChunk = uDiv;
|
||
|
if (uBitLen - ((uChunk * 3) << 5)) {
|
||
|
uDiv++;
|
||
|
}
|
||
|
uChunk = --uDiv;
|
||
|
break;
|
||
|
|
||
|
case PKE_SIZE_PREC_3:
|
||
|
uDiv = uBitLen >> 7;
|
||
|
uChunk = uDiv;
|
||
|
if (uBitLen - (uChunk << 7)) {
|
||
|
uDiv++;
|
||
|
}
|
||
|
uChunk = --uDiv;
|
||
|
break;
|
||
|
default:
|
||
|
debug("Unsupported precision value: %u", uPrecision);
|
||
|
uChunk = 0;
|
||
|
}
|
||
|
return (((uChunk << 3) + 8) << 2);
|
||
|
}
|
||
|
|
||
|
static uint32_t
|
||
|
config_pke_key_len(uint32_t uBitLen)
|
||
|
{
|
||
|
uint32_t uDiv, uChunk, uPrecision;
|
||
|
|
||
|
/* 1. Get Precision */
|
||
|
uPrecision = get_precision(uBitLen);
|
||
|
|
||
|
/* 2. Get chunck size */
|
||
|
uChunk = get_chunkSize(uBitLen, uPrecision);
|
||
|
|
||
|
/* 3. Set precision and chunksize */
|
||
|
uDiv = ((uChunk >> 2) - 8) | uPrecision;
|
||
|
|
||
|
#if IN_KERNEL
|
||
|
//Set the IODelay time based on uBitLen.
|
||
|
if (uBitLen <= PKE_LEN_BIT_512) {
|
||
|
delay_time = iodelay_time[0];
|
||
|
}
|
||
|
else if (uBitLen <= PKE_LEN_BIT_1024) {
|
||
|
delay_time = iodelay_time[1];
|
||
|
}
|
||
|
else {
|
||
|
delay_time = iodelay_time[2];
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
return uDiv;
|
||
|
}
|
||
|
|
||
|
static inline uint32_t
|
||
|
RSAPkeSetInOut(uint32_t uDest, uint32_t uSrc1, uint32_t uSrc2)
|
||
|
{
|
||
|
uint32_t uPosition = 0;
|
||
|
|
||
|
/*! Set segment ID of src and dest */
|
||
|
uPosition = ((uSrc1 << PKE_REGSHIFT_SEG_ID_A_SEG_ID)
|
||
|
| (uSrc2 << PKE_REGSHIFT_SEG_ID_B_SEG_ID)
|
||
|
| (uDest << PKE_REGSHIFT_SEG_ID_S_SEG_ID));
|
||
|
|
||
|
return uPosition;
|
||
|
}
|
||
|
|
||
|
static inline bool
|
||
|
RSAPkeRun(volatile struct pke_regs *registers, uint32_t uCmdVal)
|
||
|
{
|
||
|
/*! Run PKE */
|
||
|
registers->pke_start = uCmdVal;
|
||
|
|
||
|
#if IN_KERNEL
|
||
|
IODelay(delay_time);
|
||
|
#endif
|
||
|
|
||
|
do {
|
||
|
if (!(registers->pke_start & PKE_REGMASK_START_EXEC_ON))
|
||
|
return true;
|
||
|
} while (1);
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
static bool shift_left(uint32_t *data, uint32_t data_size)
|
||
|
{
|
||
|
int prev_carry = 0, cur_carry = 0;
|
||
|
uint32_t itr = 0;
|
||
|
|
||
|
for (itr=0; itr < (data_size/sizeof(uint32_t)); itr++) {
|
||
|
cur_carry = data[itr] >> 31;
|
||
|
data[itr] = (data[itr] << 1) + prev_carry;
|
||
|
prev_carry = cur_carry;
|
||
|
}
|
||
|
return prev_carry;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int32_t highestSetBit(uint32_t *data1, uint32_t data_size)
|
||
|
{
|
||
|
int32_t ix = data_size/sizeof(uint32_t) - 1; // Most significant word.
|
||
|
|
||
|
while( ix>=0 && data1[ix]==0) {
|
||
|
ix--;
|
||
|
}
|
||
|
|
||
|
if(ix < 0)
|
||
|
return -1; //mods is zero.
|
||
|
|
||
|
uint32_t msNZword = data1[ix];
|
||
|
uint32_t b = ((ix+1) * sizeof(uint32_t) * DATA_BYTE_TO_BIT) - 1 ;
|
||
|
// Now find the highest set bit.
|
||
|
uint32_t mask = 0x80000000;
|
||
|
while ((msNZword & mask) == 0) {
|
||
|
mask >>= 1;
|
||
|
b--;
|
||
|
}
|
||
|
return b;
|
||
|
}
|
||
|
|
||
|
|
||
|
static bool is_greater(uint32_t *data1, uint32_t *data2, uint32_t data_size)
|
||
|
{
|
||
|
uint32_t itr = data_size/sizeof(uint32_t) - 1; //start at the last word.
|
||
|
|
||
|
do {
|
||
|
if(data1[itr] > data2[itr])
|
||
|
return true;
|
||
|
else if(data1[itr] < data2[itr])
|
||
|
return false;
|
||
|
//check next word;
|
||
|
}while(itr--);
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/* Do data1 = data1 - data2 */
|
||
|
static uint32_t sub(uint32_t *data1, uint32_t *data2, uint32_t data_size)
|
||
|
{
|
||
|
uint32_t borrow = 0;
|
||
|
uint32_t itr = 0;
|
||
|
|
||
|
for(itr=0; itr < (data_size/sizeof(uint32_t)); itr++) {
|
||
|
if(borrow) {
|
||
|
if(data1[itr] != 0)
|
||
|
borrow = 0; //No need to borrow again.
|
||
|
data1[itr] -= 1;
|
||
|
}
|
||
|
|
||
|
if( data1[itr] < data2[itr] )
|
||
|
borrow = 1;
|
||
|
|
||
|
data1[itr] -= data2[itr];
|
||
|
}
|
||
|
if(borrow)
|
||
|
return 1;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*Do data1 = data1 + data2 */
|
||
|
static uint32_t add(uint32_t *data1, uint32_t *data2, uint32_t data_size)
|
||
|
{
|
||
|
uint32_t temp;
|
||
|
|
||
|
int carry = 0;
|
||
|
uint32_t itr = 0;
|
||
|
|
||
|
for(itr=0; itr < (data_size/sizeof(uint32_t)); itr++) {
|
||
|
temp = data1[itr] + data2[itr] + carry;
|
||
|
if(temp < data1[itr] || (temp == data1[itr] && carry))
|
||
|
carry = 1;
|
||
|
else
|
||
|
carry = 0;
|
||
|
data1[itr] = temp;
|
||
|
}
|
||
|
return carry;
|
||
|
}
|
||
|
|
||
|
// The src buffer is assumed to be word aligned at this point.
|
||
|
// The hardware register is 4 byte aligned.
|
||
|
// The data is in little Endian.
|
||
|
static inline void copy_into_segment(volatile uint8_t *memory, uint32_t segment_id, uint32_t segment_size, uint8_t *data, uint32_t length)
|
||
|
{
|
||
|
volatile uint8_t *dst = memory + segment_id * segment_size;
|
||
|
uint32_t ii=0;
|
||
|
|
||
|
//Make sure the buffer is word aligned.
|
||
|
//assert((data & 3) == 0); //We don't assert in iBoot. The only interface makes sure we have aligned buffer.
|
||
|
|
||
|
uint32_t newLength = length & ~3 ;
|
||
|
//Copy words.
|
||
|
for(ii=0; ii<(newLength/4); ii++)
|
||
|
*((uint32_t*)dst + ii) = *((uint32_t*)data + ii);
|
||
|
|
||
|
//See if you have less than a word left to copy.
|
||
|
if(length & 3) { //have less than 4 bytes left to copy.
|
||
|
uint8_t wordData[4] = {0,0,0,0};
|
||
|
uint8_t *bytesLeft = data + newLength;
|
||
|
for(ii=0; ii<(length & 3); ii++) {
|
||
|
wordData[ii] = *(bytesLeft + ii);
|
||
|
}
|
||
|
*((uint32_t*)(dst+newLength)) = *((uint32_t*)wordData);
|
||
|
}
|
||
|
|
||
|
//zero fill the rest of the segment.
|
||
|
uint32_t bytesCopied = ((length+3) & ~3);
|
||
|
uint32_t left = segment_size - bytesCopied;
|
||
|
if(left) {
|
||
|
uint32_t *dest = (uint32_t *)(dst + bytesCopied);
|
||
|
for(ii=0; ii<(left/4); ii++)
|
||
|
*(dest+ii) = 0x00000000;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Calculate R0 in software.
|
||
|
rzero has memory allocated of size modulus_length and is zero filled.
|
||
|
*/
|
||
|
bool rsa_cal_R0(uint32_t * const mods, uint32_t modulus_length, uint32_t *rzero)
|
||
|
{
|
||
|
debug("called");
|
||
|
|
||
|
uint32_t precision = get_precision(modulus_length*DATA_BYTE_TO_BIT);
|
||
|
uint32_t chunk_size = get_chunkSize(modulus_length*DATA_BYTE_TO_BIT, precision);
|
||
|
|
||
|
//precision is 1,2,3,4 instead of 0,1,2,3
|
||
|
precision += 1;
|
||
|
|
||
|
/* Compute 2^b */
|
||
|
uint32_t power = (chunk_size / 16) + 1;
|
||
|
if (precision == 3)
|
||
|
power = power * 3;
|
||
|
power = power + (precision * (chunk_size + 16));
|
||
|
|
||
|
//compute the most significant bit set in the modulus.
|
||
|
int32_t b = highestSetBit((uint32_t*)mods, modulus_length);
|
||
|
if(b == -1)
|
||
|
return false;
|
||
|
|
||
|
//store 2^b in rzero.
|
||
|
bzero(rzero, modulus_length);
|
||
|
rzero[(b/32)] = 1 << (b % 32);
|
||
|
|
||
|
/* Calculate (2^(power-b) * 2^b) mod M */
|
||
|
uint32_t itr;
|
||
|
for(itr=0; itr<(power-b); itr++) {
|
||
|
bool carry = shift_left(rzero, modulus_length); //Shift left.
|
||
|
if (carry || is_greater(rzero, mods, modulus_length))
|
||
|
sub(rzero, (uint32_t*)mods, modulus_length);
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Compute the value of R^2 using the hardware.
|
||
|
The value of rzero (R0) is calculated in softare and passed into the function
|
||
|
The hardware should be reset before calling this function.
|
||
|
The value of R^2 is returned thru' rsquare (memory for this is allocated before calling this function and of modulus_length size)
|
||
|
*/
|
||
|
static bool rsa_cal_R2modM(volatile struct pke_regs *registers, volatile uint8_t *memory, uint32_t seg_size, uint32_t * const mods, uint32_t mods_length, uint32_t *rsquare, uint32_t *r2_segid)
|
||
|
{
|
||
|
debug("called");
|
||
|
|
||
|
if(!rsquare) //Need rzero (which should be in rsquare) to proceed
|
||
|
return false;
|
||
|
|
||
|
//Set the key length;
|
||
|
registers->pke_key_len = config_pke_key_len(mods_length*DATA_BYTE_TO_BIT);
|
||
|
//Set segment size;
|
||
|
registers->pke_seg_size = PkeSetMode(seg_size);
|
||
|
registers->pke_seg_sign = 0;
|
||
|
registers->pke_seg_id = 0;
|
||
|
|
||
|
/* Use hardware to calculate R1 thru' Rn and finally R^2 */
|
||
|
//Load modulus always into seg0.
|
||
|
copy_into_segment(memory, mod_segid, seg_size, (uint8_t*)mods, mods_length);
|
||
|
|
||
|
//based on seg_size compute the seg_ids to store the (2^(power-b) * 2^b) mod M
|
||
|
// and the final value of R^2
|
||
|
uint32_t rzero_segid, rsq_segid;
|
||
|
switch (seg_size) {
|
||
|
case PKE_SEG_SIZE_256:
|
||
|
rzero_segid = PKE_SEG_ID_05;
|
||
|
rsq_segid = PKE_SEG_ID_06;
|
||
|
break;
|
||
|
case PKE_SEG_SIZE_128:
|
||
|
rzero_segid = PKE_SEG_ID_12;
|
||
|
rsq_segid = PKE_SEG_ID_14;
|
||
|
break;
|
||
|
case PKE_SEG_SIZE_64:
|
||
|
rzero_segid = PKE_SEG_ID_28;
|
||
|
rsq_segid = PKE_SEG_ID_29;
|
||
|
break;
|
||
|
default:
|
||
|
//Should not happen.
|
||
|
return false;
|
||
|
}
|
||
|
//Load the rzero into the hardware.
|
||
|
copy_into_segment(memory, rzero_segid, seg_size, (uint8_t*)rsquare, mods_length);
|
||
|
|
||
|
uint32_t precision = (registers->pke_key_len&PKE_REGMASK_KEY_LEN_PREC_ID) + 1;
|
||
|
//Compute the number of hardware iterations based on precision.
|
||
|
uint32_t square_count;
|
||
|
if (precision == 1 || precision == 3)
|
||
|
square_count = 4;
|
||
|
else if (precision == 2)
|
||
|
square_count = 5;
|
||
|
else /* precision == 4 */
|
||
|
square_count = 6;
|
||
|
|
||
|
//Set pre load modulus on when using the hardware for the first time.
|
||
|
uint32_t start_mask = PKE_REGMASK_START_PLDM_ON | PKE_REGMASK_START_EXEC_ON;
|
||
|
uint32_t itr = 1;
|
||
|
uint32_t temp_segid = rsq_segid;
|
||
|
while (itr <= square_count) {
|
||
|
//setup seg_id pointers on the hardware
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(temp_segid, rzero_segid, rzero_segid);
|
||
|
//kick the hardware.
|
||
|
if(!RSAPkeRun(registers, start_mask))
|
||
|
return false;
|
||
|
itr++;
|
||
|
if(itr > square_count)
|
||
|
break;
|
||
|
//swap the segids and do the square again.
|
||
|
temp_segid += rzero_segid;
|
||
|
rzero_segid = temp_segid - rzero_segid;
|
||
|
temp_segid -= rzero_segid;
|
||
|
start_mask = PKE_REGMASK_START_EXEC_ON; //dont have to laod M again.
|
||
|
}
|
||
|
|
||
|
// Check the sign value of temp_segid and add modulus if negative.
|
||
|
uint32_t uRead, uMask;
|
||
|
uMask = (1 << temp_segid);
|
||
|
uRead = registers->pke_seg_sign & uMask;
|
||
|
//If negative then add modulus.
|
||
|
if(uRead) {
|
||
|
add( (uint32_t*)(memory+(temp_segid*seg_size)), (uint32_t*)mods, mods_length);
|
||
|
}
|
||
|
|
||
|
*r2_segid = temp_segid;
|
||
|
|
||
|
//Clean up the temp space used.
|
||
|
bzero((uint8_t*) memory+rzero_segid*seg_size, mods_length);
|
||
|
|
||
|
/* Reset the signbits */
|
||
|
registers->pke_seg_sign = 0;
|
||
|
memcpy(rsquare, (uint8_t *) memory+rsq_segid*seg_size, mods_length);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Calculate exponential.
|
||
|
The options parameter passed in indicates if R^2 is passed in or needs to be computed.
|
||
|
Options = 00 => Compute R^2 and don't care about caching. *rsquare is ignored.
|
||
|
Options = 01 => Compute R^2 and return it for caching. *rsquare has mem allocated and is of size modulus_length
|
||
|
Options = 10 => Use passed in R^2 value.
|
||
|
|
||
|
Reset the hardware before calling this function.
|
||
|
*/
|
||
|
bool rsa_cal_exp(void *dst, uint32_t *len, uint32_t options,
|
||
|
uint8_t *rsquare, uint32_t *rsquare_length,
|
||
|
uint8_t * const base, uint32_t base_length,
|
||
|
uint8_t * const expn, uint32_t expn_length,
|
||
|
uint8_t * const modulus, uint32_t modulus_length,
|
||
|
volatile struct pke_regs *registers, volatile uint8_t *memory)
|
||
|
{
|
||
|
debug("called");
|
||
|
|
||
|
/* Perform validation checks */
|
||
|
if(modulus_length % 4)
|
||
|
return false;
|
||
|
|
||
|
uint32_t segment_size, num_segments;
|
||
|
|
||
|
if (modulus_length <= (PKE_SEG_SIZE_64)) {
|
||
|
segment_size = PKE_SEG_SIZE_64;
|
||
|
} else if (modulus_length <= (PKE_SEG_SIZE_128)) {
|
||
|
segment_size = PKE_SEG_SIZE_128;
|
||
|
} else if (modulus_length <= (PKE_SEG_SIZE_256)) {
|
||
|
segment_size = PKE_SEG_SIZE_256;
|
||
|
} else
|
||
|
return false; /* key too long */
|
||
|
|
||
|
num_segments = PkeGetSegmentCount(segment_size);
|
||
|
|
||
|
if ( !num_segments ||
|
||
|
base_length > segment_size ||
|
||
|
expn_length > segment_size ||
|
||
|
modulus_length > segment_size)
|
||
|
return false;
|
||
|
|
||
|
/* Sane input, so proceed */
|
||
|
|
||
|
//rsquare segid must always be the last seg ID.
|
||
|
uint32_t rsq_segid = num_segments - 1;
|
||
|
|
||
|
if( (options&kIOPKEAcceleratorComputeRsquareMask) == 0 ) { //Compute rsquare.
|
||
|
if(!(options&kIOPKEAcceleratorPreProcessingDone)) { //R0 was not cacluated. should only happen in iBoot.
|
||
|
if(!rsquare){
|
||
|
//should not happen. The expectation is that memory for rsquare should be allocated.
|
||
|
return false;
|
||
|
}
|
||
|
rsa_cal_R0((uint32_t*)modulus, modulus_length, (uint32_t*)rsquare);
|
||
|
}
|
||
|
//Compute R^2
|
||
|
if(!rsa_cal_R2modM(registers, memory, segment_size, (uint32_t*)modulus, modulus_length, (uint32_t*)rsquare, &rsq_segid))
|
||
|
return false;
|
||
|
*rsquare_length = modulus_length;
|
||
|
}
|
||
|
else { //This would have been done while calculating R^2.
|
||
|
//Set the key length;
|
||
|
registers->pke_key_len = config_pke_key_len(modulus_length*DATA_BYTE_TO_BIT);
|
||
|
//Set segment size;
|
||
|
registers->pke_seg_size = PkeSetMode(segment_size);
|
||
|
/* Reset the signbits */
|
||
|
registers->pke_seg_sign = 0;
|
||
|
registers->pke_seg_id = 0;
|
||
|
|
||
|
//Load modulus into seg0;
|
||
|
copy_into_segment(memory, mod_segid, segment_size, modulus, modulus_length);
|
||
|
//copy the rsquare into the hardware segment.
|
||
|
memcpy((uint8_t*)memory+(rsq_segid*segment_size), rsquare, modulus_length);
|
||
|
}
|
||
|
|
||
|
/* Compute (base * r^2 mod M) using hardware */
|
||
|
//Copy base into tmp_segid.
|
||
|
copy_into_segment(memory, tmp_segid, segment_size, base, base_length);
|
||
|
//Setup the segid pointers.
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(iter_segid, tmp_segid, rsq_segid);
|
||
|
|
||
|
//Kick the hardware with pre load modulus bit set.
|
||
|
if(!RSAPkeRun(registers, (PKE_REGMASK_START_PLDM_ON | PKE_REGMASK_START_EXEC_ON)))
|
||
|
return false;
|
||
|
|
||
|
/* compute (1*R^2modM) using hardware. */
|
||
|
registers->pke_seg_size |= PKE_REGVAL_SEG_SIZE_FUNC_ID_A1; // A*1
|
||
|
//When PKE_REGVAL_SEG_SIZE_FUNC_ID_A1 is set, the hardware ignores the segid_B pointer
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(acum_segid, rsq_segid, rsq_segid);
|
||
|
if(!RSAPkeRun(registers, PKE_REGMASK_START_EXEC_ON))
|
||
|
return false;
|
||
|
|
||
|
// Clear the PKE_REGVAL_SEG_SIZE_FUNC_ID_A1 bit.
|
||
|
registers->pke_seg_size &= ~PKE_REGVAL_SEG_SIZE_FUNC_ID_A1;
|
||
|
|
||
|
/* Run the PKE */
|
||
|
uint32_t *exp_word = (uint32_t*)expn;
|
||
|
/* word aligned byte length */
|
||
|
uint32_t uExpLen = ((expn_length + (DATA_WORD_TO_BYTE - 1)) & (~0x3));
|
||
|
|
||
|
uint32_t last_word = exp_word[(uExpLen/DATA_WORD_TO_BYTE) - 1];
|
||
|
uint32_t uIndex = (DATA_WORD_TO_BYTE*DATA_BYTE_TO_BIT) - (highestSetBit(&last_word, DATA_WORD_TO_BYTE) + 1);
|
||
|
for (; uIndex < (uExpLen * DATA_BYTE_TO_BIT); uIndex++) {
|
||
|
unsigned int word;
|
||
|
unsigned int bit;
|
||
|
|
||
|
uint32_t offset = (uExpLen / DATA_WORD_TO_BYTE) - (uIndex / (DATA_WORD_TO_BYTE * DATA_BYTE_TO_BIT)) - 1;
|
||
|
word = exp_word[offset];
|
||
|
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(tmp_segid, acum_segid, acum_segid);
|
||
|
if(!RSAPkeRun(registers, PKE_REGMASK_START_EXEC_ON))
|
||
|
return false;
|
||
|
|
||
|
bit = word & (0x80000000 >> (uIndex % (DATA_WORD_TO_BYTE * DATA_BYTE_TO_BIT)));
|
||
|
|
||
|
if (bit) {
|
||
|
//debug("bit %d set", uIndex);
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(acum_segid, tmp_segid, iter_segid);
|
||
|
if(!RSAPkeRun(registers, PKE_REGMASK_START_EXEC_ON))
|
||
|
return false;
|
||
|
} else {
|
||
|
memcpy( (uint8_t *) memory + acum_segid * segment_size,
|
||
|
(uint8_t *) memory + tmp_segid * segment_size,
|
||
|
segment_size);
|
||
|
|
||
|
if (registers->pke_seg_sign & (0x01 << tmp_segid))
|
||
|
registers->pke_seg_sign |= (0x01 << acum_segid);
|
||
|
else
|
||
|
registers->pke_seg_sign &= ~(0x01 << acum_segid);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Factor out R^-1 from the result */
|
||
|
registers->pke_seg_size |= PKE_REGVAL_SEG_SIZE_FUNC_ID_A1; // A*1
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(tmp_segid, acum_segid, acum_segid);
|
||
|
if (!RSAPkeRun(registers, PKE_REGMASK_START_EXEC_ON))
|
||
|
return false;
|
||
|
registers->pke_seg_size &= ~PKE_REGVAL_SEG_SIZE_FUNC_ID_A1; // A*B
|
||
|
|
||
|
// Check the sign value of tmp_segid and add modulus if negative.
|
||
|
uint32_t uRead, uMask;
|
||
|
uMask = (1 << tmp_segid);
|
||
|
uRead = registers->pke_seg_sign & uMask;
|
||
|
//If negative then add modulus.
|
||
|
if(uRead) {
|
||
|
registers->pke_seg_sign &= ~uMask; //clear the sign.
|
||
|
add( (uint32_t*)(memory+(tmp_segid*segment_size)), (uint32_t*)modulus, modulus_length);
|
||
|
}
|
||
|
|
||
|
/* Copy data out */
|
||
|
memcpy(dst, (uint8_t*)memory+(tmp_segid*segment_size), modulus_length);
|
||
|
*len = modulus_length;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#if IN_KERNEL
|
||
|
|
||
|
bool internalTest(volatile struct pke_regs *registers, volatile uint8_t *memory, uint32_t mod_size, uint64_t *time_micro)
|
||
|
{
|
||
|
uint8_t buff[mod_size];
|
||
|
uint32_t segment_size;
|
||
|
|
||
|
if (mod_size <= (PKE_SEG_SIZE_64)) {
|
||
|
segment_size = PKE_SEG_SIZE_64;
|
||
|
} else if (mod_size <= (PKE_SEG_SIZE_128)) {
|
||
|
segment_size = PKE_SEG_SIZE_128;
|
||
|
} else if (mod_size <= (PKE_SEG_SIZE_256)) {
|
||
|
segment_size = PKE_SEG_SIZE_256;
|
||
|
} else
|
||
|
return false; /* key too long */
|
||
|
|
||
|
uint32_t rsq_segid;
|
||
|
switch (segment_size) {
|
||
|
case PKE_SEG_SIZE_256:
|
||
|
rsq_segid = PKE_SEG_ID_06;
|
||
|
break;
|
||
|
case PKE_SEG_SIZE_128:
|
||
|
rsq_segid = PKE_SEG_ID_14;
|
||
|
break;
|
||
|
case PKE_SEG_SIZE_64:
|
||
|
rsq_segid = PKE_SEG_ID_29;
|
||
|
break;
|
||
|
default:
|
||
|
//Should not happen.
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
registers->pke_key_len = config_pke_key_len(mod_size*DATA_BYTE_TO_BIT);
|
||
|
//Set segment size;
|
||
|
registers->pke_seg_size = PkeSetMode(segment_size);
|
||
|
/* Reset the signbits */
|
||
|
registers->pke_seg_sign = 0;
|
||
|
registers->pke_seg_id = 0;
|
||
|
|
||
|
debug("key: %x, reg_key: %x, modsize:%u, reg_segsize: %u", config_pke_key_len(mod_size*DATA_BYTE_TO_BIT), registers->pke_key_len, mod_size, registers->pke_seg_size);
|
||
|
|
||
|
//Load modulus into seg0;
|
||
|
memset(buff, 0xF0, mod_size);
|
||
|
copy_into_segment(memory, mod_segid, segment_size, buff, mod_size);
|
||
|
//Set R^2
|
||
|
memset(buff, 0xE5, mod_size);
|
||
|
copy_into_segment(memory, rsq_segid, segment_size, buff, mod_size);
|
||
|
//Set A
|
||
|
memset(buff, 0xD2, mod_size);
|
||
|
copy_into_segment(memory, iter_segid, segment_size, buff, mod_size);
|
||
|
|
||
|
|
||
|
//Setup the segid pointers.
|
||
|
registers->pke_seg_id = RSAPkeSetInOut(iter_segid, tmp_segid, tmp_segid);
|
||
|
|
||
|
uint64_t tasm0, tasm1, tasm2, asm_ns;
|
||
|
tasm0 = mach_absolute_time();
|
||
|
tasm1 = mach_absolute_time();
|
||
|
|
||
|
//Kick the hardware with pre load modulus bit set.
|
||
|
if(!RSAPkeRun(registers, (PKE_REGMASK_START_PLDM_ON | PKE_REGMASK_START_EXEC_ON)))
|
||
|
return false;
|
||
|
|
||
|
tasm2 = mach_absolute_time();
|
||
|
absolutetime_to_nanoseconds( (tasm2 + tasm0 - tasm1 - tasm1), &asm_ns);
|
||
|
//debug("Single multiply time: %lluns\n", asm_ns);
|
||
|
|
||
|
*time_micro = (asm_ns/1000) - 5;
|
||
|
|
||
|
//clear the hardware.
|
||
|
registers->pke_key_len = 0;
|
||
|
registers->pke_seg_size = 0;
|
||
|
registers->pke_seg_sign = 0;
|
||
|
registers->pke_seg_id = 0;
|
||
|
|
||
|
bzero((uint8_t*)memory+(tmp_segid*segment_size), mod_size);
|
||
|
bzero((uint8_t*)memory+(mod_segid*segment_size), mod_size);
|
||
|
bzero((uint8_t*)memory+(iter_segid*segment_size), mod_size);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#endif
|