Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ OBJS=$(SOURCES:%=$(BUILD)/%.o)


ifeq ($(UNAME),Darwin)
CFLAGS+=-I/Library/Developer/CommandLineTools/SDKs/MacOSX15.2.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
FRAMEWORKS_DIR=/Library/Developer/CommandLineTools/SDKs/MacOSX15.2.sdk/System/Library/Frameworks/
ACCELERATE_HEADERS=$(FRAMEWORKS_DIR)/Accelerate.framework/Versions/A/Headers
CFLAGS+=-I$(ACCELERATE_HEADERS) -DACCELERATE_NEW_LAPACK
LDFLAGS+=-dynamiclib -framework Accelerate
else
LDFLAGS+=-shared
Expand Down
13 changes: 11 additions & 2 deletions scripts/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
from pathlib import Path
import time

import numpy as np
from transformers import AutoTokenizer
Expand All @@ -27,6 +28,9 @@ def test(tokenizer, model, state) -> str:

token_ids = tokenizer(text)["input_ids"]

# start timing here
start_time = time.time()

# feed in the prompt
pos = 0
logits = None
Expand All @@ -37,18 +41,23 @@ def test(tokenizer, model, state) -> str:

output_tokens = []

prompt_time = time.time() - start_time

# sample starting with last token of prompt
last_output_token_id = np.argmax(logits)
output_tokens.append(last_output_token_id)
#
for _ in range(20):
for _ in range(64):
logits = forward(model, state, last_output_token_id, pos)
pos += 1
assert np.all(np.isfinite(logits))
print(logits)
last_output_token_id = np.argmax(logits)
output_tokens.append(last_output_token_id)

end_time = time.time() - start_time

print(f"Prompt time: {prompt_time:.4f}s")
print(f"Total time: {end_time:.4f}s")
print(tokenizer.decode(output_tokens, skip_special_tokens=True))


Expand Down
29 changes: 16 additions & 13 deletions src/matmul.c
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
#include "matmul.h"

void mm(const float *restrict A, const float *restrict B, float *restrict C, int M, int N, int K)
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#endif

#ifdef __APPLE__
void mva(const float *restrict A, const float *restrict x, const float *restrict b, float *restrict y, int M, int N)
{
for (int i = 0; i < M; ++i)
{
for (int j = 0; j < N; ++j)
{
float sum = 0.0;
for (int k = 0; k < K; ++k)
{
sum += A[i * K + k] * B[k * N + j];
}
C[i * N + j] = sum;
}
}
memcpy(y, b, sizeof(float) * M); // Copy b to y
cblas_sgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0f, A, N, x, 1, 1.0f, y, 1);
}

void mv(const float *restrict A, const float *restrict x, float *restrict y, int M, int N)
{
cblas_sgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0f, A, N, x, 1, 0.0f, y, 1);
}

#else
void mva(const float *restrict A, const float *restrict x, const float *restrict b, float *restrict y, int M, int N)
{
for (int i = 0; i < M; ++i)
Expand All @@ -40,3 +41,5 @@ void mv(const float *restrict A, const float *restrict x, float *restrict y, int
}
}
}

#endif
9 changes: 0 additions & 9 deletions src/matmul.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
#pragma once

/**
* Perform matrix multiplication of two matrices A and B.
*
* @param A Pointer to the first matrix, with dimensions M x K.
* @param B Pointer to the second matrix, with dimensions K x N.
* @param C Pointer to the result matrix, with dimensions M x N.
*/
void mm(const float *restrict A, const float *restrict B, float *restrict C, int M, int N, int K);

/**
* Compute
* y = Ax + b
Expand Down