Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modifications to support GloVe on Windows #12

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Makefile.mak
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# To enable nmake and cl locate and run "vcvarsall.bat amd64"
# typically in "C:\Program Files (x86)\Microsoft Visual Studio 99.9\VC"
# see: https://msdn.microsoft.com/en-us/library/x4d2c09s.aspx

CC=CL
# optimization flags may not be the best
# see /arch flag here: https://msdn.microsoft.com/en-us/library/jj620901.aspx
CFLAGS=/TC /MT /W3 /D_CRT_SECURE_NO_WARNINGS /Ox /Fp:fast /arch:AVX2
BUILDDIR=build
SRCDIR=src

all: dir vocab_count cooccur shuffle glove

dir :
IF exist $(BUILDDIR) ( echo $(BUILDDIR) exists ) ELSE ( MD $(BUILDDIR))
glove : $(SRCDIR)/glove.c
$(CC) $(SRCDIR)/glove.c /Fo$(BUILDDIR)/glove.obj $(CFLAGS) /link /OUT:$(BUILDDIR)/glove.exe
shuffle : $(SRCDIR)/shuffle.c
$(CC) $(SRCDIR)/shuffle.c /Fo$(BUILDDIR)/shuffle.obj $(CFLAGS) /link /OUT:$(BUILDDIR)/shuffle.exe
cooccur : $(SRCDIR)/cooccur.c
$(CC) $(SRCDIR)/cooccur.c /Fo$(BUILDDIR)/cooccur.obj $(CFLAGS) /link /OUT:$(BUILDDIR)/cooccur.exe
vocab_count : $(SRCDIR)/vocab_count.c
$(CC) $(SRCDIR)/vocab_count.c /Fo$(BUILDDIR)/vocab_count.obj $(CFLAGS) /link /OUT:$(BUILDDIR)/vocab_count.exe

clean:
RD /Q /S $(BUILDDIR)
2 changes: 1 addition & 1 deletion src/cooccur.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#define SEED 1159241
#define HASHFN bitwisehash

static const int MAX_STRING_LENGTH = 1000;
#define MAX_STRING_LENGTH 1000
typedef double real;

typedef struct cooccur_rec {
Expand Down
33 changes: 26 additions & 7 deletions src/glove.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,15 @@
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <pthread.h>
#ifndef _MSC_VER
# include <pthread.h>
typedef pthread_t thread_type;
#else
# include <windows.h>
typedef HANDLE thread_type;
#define fseeko _fseeki64
#define ftello _ftelli64
#endif

#define _FILE_OFFSET_BITS 64
#define MAX_STRING_LENGTH 1000
Expand Down Expand Up @@ -64,12 +72,12 @@ void initialize_parameters() {
vector_size++; // Temporarily increment to allocate space for bias

/* Allocate space for word vectors and context word vectors, and correspodning gradsq */
a = posix_memalign((void **)&W, 128, 2 * vocab_size * (vector_size + 1) * sizeof(real)); // Might perform better than malloc
W = (real*)malloc(2 * vocab_size * (vector_size + 1) * sizeof(real));
if (W == NULL) {
fprintf(stderr, "Error allocating memory for W\n");
exit(1);
}
a = posix_memalign((void **)&gradsq, 128, 2 * vocab_size * (vector_size + 1) * sizeof(real)); // Might perform better than malloc
gradsq = (real*)malloc(2 * vocab_size * (vector_size + 1) * sizeof(real));
if (gradsq == NULL) {
fprintf(stderr, "Error allocating memory for gradsq\n");
exit(1);
Expand Down Expand Up @@ -127,7 +135,7 @@ void *glove_thread(void *vid) {
}

fclose(fin);
pthread_exit(NULL);
return NULL;
}

/* Save params to file */
Expand Down Expand Up @@ -245,7 +253,7 @@ int train_glove() {
if (verbose > 0) fprintf(stderr,"vocab size: %lld\n", vocab_size);
if (verbose > 0) fprintf(stderr,"x_max: %lf\n", x_max);
if (verbose > 0) fprintf(stderr,"alpha: %lf\n", alpha);
pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
thread_type *pt = (thread_type *)malloc(num_threads * sizeof(thread_type));
lines_per_thread = (long long *) malloc(num_threads * sizeof(long long));

// Lock-free asynchronous SGD
Expand All @@ -255,8 +263,19 @@ int train_glove() {
lines_per_thread[a] = num_lines / num_threads + num_lines % num_threads;
long long *thread_ids = (long long*)malloc(sizeof(long long) * num_threads);
for (a = 0; a < num_threads; a++) thread_ids[a] = a;
for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, glove_thread, (void *)&thread_ids[a]);
for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
for (a = 0; a < num_threads; a++)
{
#ifndef _MSC_VER
pthread_create(&pt[a], NULL, glove_thread, (void *)&thread_ids[a]);
#else
pt[a] = CreateThread(NULL, 0, glove_thread, &thread_ids[a], 0, NULL);
#endif
}
#ifndef _MSC_VER
for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
#else
WaitForMultipleObjects(num_threads, pt, TRUE, INFINITE);
#endif
for (a = 0; a < num_threads; a++) total_cost += cost[a];
fprintf(stderr,"iter: %03d, cost: %lf\n", b+1, total_cost/num_lines);
}
Expand Down