summaryrefslogtreecommitdiff
path: root/trace/simple.c
blob: ac499edee0d5fb7b19c700525b9b524ca275f3bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
/*
 * Simple trace backend
 *
 * Copyright IBM, Corp. 2010
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

#include "qemu/osdep.h"
#ifndef _WIN32
#include <pthread.h>
#endif
#include "qemu/timer.h"
#include "trace/control.h"
#include "trace/simple.h"
#include "qemu/error-report.h"
#include "qemu/qemu-print.h"

/** Trace file header event ID, picked to avoid conflict with real event IDs */
#define HEADER_EVENT_ID (~(uint64_t)0)

/** Trace file magic number */
#define HEADER_MAGIC 0xf2b177cb0aa429b4ULL

/** Trace file version number, bump if format changes */
#define HEADER_VERSION 4

/** Records were dropped event ID */
#define DROPPED_EVENT_ID (~(uint64_t)0 - 1)

/** Trace record is valid */
#define TRACE_RECORD_VALID ((uint64_t)1 << 63)

/*
 * Trace records are written out by a dedicated thread.  The thread waits for
 * records to become available, writes them out, and then waits again.
 */
static GMutex trace_lock;
static GCond trace_available_cond;
static GCond trace_empty_cond;

static bool trace_available;
static bool trace_writeout_enabled;

enum {
    TRACE_BUF_LEN = 4096 * 64,
    TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4,
};

uint8_t trace_buf[TRACE_BUF_LEN];
static volatile gint trace_idx;
static unsigned int writeout_idx;
static volatile gint dropped_events;
static uint32_t trace_pid;
static FILE *trace_fp;
static char *trace_file_name;

#define TRACE_RECORD_TYPE_MAPPING 0
#define TRACE_RECORD_TYPE_EVENT   1

/* * Trace buffer entry */
typedef struct {
    uint64_t event; /* event ID value */
    uint64_t timestamp_ns;
    uint32_t length;   /*    in bytes */
    uint32_t pid;
    uint64_t arguments[];
} TraceRecord;

typedef struct {
    uint64_t header_event_id; /* HEADER_EVENT_ID */
    uint64_t header_magic;    /* HEADER_MAGIC    */
    uint64_t header_version;  /* HEADER_VERSION  */
} TraceLogHeader;


static void read_from_buffer(unsigned int idx, void *dataptr, size_t size);
static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size);

static void clear_buffer_range(unsigned int idx, size_t len)
{
    uint32_t num = 0;
    while (num < len) {
        if (idx >= TRACE_BUF_LEN) {
            idx = idx % TRACE_BUF_LEN;
        }
        trace_buf[idx++] = 0;
        num++;
    }
}
/**
 * Read a trace record from the trace buffer
 *
 * @idx         Trace buffer index
 * @record      Trace record to fill
 *
 * Returns false if the record is not valid.
 */
static bool get_trace_record(unsigned int idx, TraceRecord **recordptr)
{
    uint64_t event_flag = 0;
    TraceRecord record;
    /* read the event flag to see if its a valid record */
    read_from_buffer(idx, &record, sizeof(event_flag));

    if (!(record.event & TRACE_RECORD_VALID)) {
        return false;
    }

    smp_rmb(); /* read memory barrier before accessing record */
    /* read the record header to know record length */
    read_from_buffer(idx, &record, sizeof(TraceRecord));
    *recordptr = malloc(record.length); /* don't use g_malloc, can deadlock when traced */
    /* make a copy of record to avoid being overwritten */
    read_from_buffer(idx, *recordptr, record.length);
    smp_rmb(); /* memory barrier before clearing valid flag */
    (*recordptr)->event &= ~TRACE_RECORD_VALID;
    /* clear the trace buffer range for consumed record otherwise any byte
     * with its MSB set may be considered as a valid event id when the writer
     * thread crosses this range of buffer again.
     */
    clear_buffer_range(idx, record.length);
    return true;
}

/**
 * Kick writeout thread
 *
 * @wait        Whether to wait for writeout thread to complete
 */
static void flush_trace_file(bool wait)
{
    g_mutex_lock(&trace_lock);
    trace_available = true;
    g_cond_signal(&trace_available_cond);

    if (wait) {
        g_cond_wait(&trace_empty_cond, &trace_lock);
    }

    g_mutex_unlock(&trace_lock);
}

static void wait_for_trace_records_available(void)
{
    g_mutex_lock(&trace_lock);
    while (!(trace_available && trace_writeout_enabled)) {
        g_cond_signal(&trace_empty_cond);
        g_cond_wait(&trace_available_cond, &trace_lock);
    }
    trace_available = false;
    g_mutex_unlock(&trace_lock);
}

static gpointer writeout_thread(gpointer opaque)
{
    TraceRecord *recordptr;
    union {
        TraceRecord rec;
        uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)];
    } dropped;
    unsigned int idx = 0;
    int dropped_count;
    size_t unused __attribute__ ((unused));
    uint64_t type = TRACE_RECORD_TYPE_EVENT;

    for (;;) {
        wait_for_trace_records_available();

        if (g_atomic_int_get(&dropped_events)) {
            dropped.rec.event = DROPPED_EVENT_ID;
            dropped.rec.timestamp_ns = get_clock();
            dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t);
            dropped.rec.pid = trace_pid;
            do {
                dropped_count = g_atomic_int_get(&dropped_events);
            } while (!g_atomic_int_compare_and_exchange(&dropped_events,
                                                        dropped_count, 0));
            dropped.rec.arguments[0] = dropped_count;
            unused = fwrite(&type, sizeof(type), 1, trace_fp);
            unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp);
        }

        while (get_trace_record(idx, &recordptr)) {
            unused = fwrite(&type, sizeof(type), 1, trace_fp);
            unused = fwrite(recordptr, recordptr->length, 1, trace_fp);
            writeout_idx += recordptr->length;
            free(recordptr); /* don't use g_free, can deadlock when traced */
            idx = writeout_idx % TRACE_BUF_LEN;
        }

        fflush(trace_fp);
    }
    return NULL;
}

void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val)
{
    rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t));
}

void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen)
{
    /* Write string length first */
    rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen));
    /* Write actual string now */
    rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen);
}

int trace_record_start(TraceBufferRecord *rec, uint32_t event, size_t datasize)
{
    unsigned int idx, rec_off, old_idx, new_idx;
    uint32_t rec_len = sizeof(TraceRecord) + datasize;
    uint64_t event_u64 = event;
    uint64_t timestamp_ns = get_clock();

    do {
        old_idx = g_atomic_int_get(&trace_idx);
        smp_rmb();
        new_idx = old_idx + rec_len;

        if (new_idx - writeout_idx > TRACE_BUF_LEN) {
            /* Trace Buffer Full, Event dropped ! */
            g_atomic_int_inc(&dropped_events);
            return -ENOSPC;
        }
    } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx));

    idx = old_idx % TRACE_BUF_LEN;

    rec_off = idx;
    rec_off = write_to_buffer(rec_off, &event_u64, sizeof(event_u64));
    rec_off = write_to_buffer(rec_off, &timestamp_ns, sizeof(timestamp_ns));
    rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len));
    rec_off = write_to_buffer(rec_off, &trace_pid, sizeof(trace_pid));

    rec->tbuf_idx = idx;
    rec->rec_off  = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN;
    return 0;
}

static void read_from_buffer(unsigned int idx, void *dataptr, size_t size)
{
    uint8_t *data_ptr = dataptr;
    uint32_t x = 0;
    while (x < size) {
        if (idx >= TRACE_BUF_LEN) {
            idx = idx % TRACE_BUF_LEN;
        }
        data_ptr[x++] = trace_buf[idx++];
    }
}

static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size)
{
    uint8_t *data_ptr = dataptr;
    uint32_t x = 0;
    while (x < size) {
        if (idx >= TRACE_BUF_LEN) {
            idx = idx % TRACE_BUF_LEN;
        }
        trace_buf[idx++] = data_ptr[x++];
    }
    return idx; /* most callers wants to know where to write next */
}

void trace_record_finish(TraceBufferRecord *rec)
{
    TraceRecord record;
    read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord));
    smp_wmb(); /* write barrier before marking as valid */
    record.event |= TRACE_RECORD_VALID;
    write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord));

    if (((unsigned int)g_atomic_int_get(&trace_idx) - writeout_idx)
        > TRACE_BUF_FLUSH_THRESHOLD) {
        flush_trace_file(false);
    }
}

static int st_write_event_mapping(TraceEventIter *iter)
{
    uint64_t type = TRACE_RECORD_TYPE_MAPPING;
    TraceEvent *ev;

    while ((ev = trace_event_iter_next(iter)) != NULL) {
        uint64_t id = trace_event_get_id(ev);
        const char *name = trace_event_get_name(ev);
        uint32_t len = strlen(name);
        if (fwrite(&type, sizeof(type), 1, trace_fp) != 1 ||
            fwrite(&id, sizeof(id), 1, trace_fp) != 1 ||
            fwrite(&len, sizeof(len), 1, trace_fp) != 1 ||
            fwrite(name, len, 1, trace_fp) != 1) {
            return -1;
        }
    }

    return 0;
}

/**
 * Enable / disable tracing, return whether it was enabled.
 *
 * @enable: enable if %true, else disable.
 */
bool st_set_trace_file_enabled(bool enable)
{
    TraceEventIter iter;
    bool was_enabled = trace_fp;

    if (enable == !!trace_fp) {
        return was_enabled;     /* no change */
    }

    /* Halt trace writeout */
    flush_trace_file(true);
    trace_writeout_enabled = false;
    flush_trace_file(true);

    if (enable) {
        static const TraceLogHeader header = {
            .header_event_id = HEADER_EVENT_ID,
            .header_magic = HEADER_MAGIC,
            /* Older log readers will check for version at next location */
            .header_version = HEADER_VERSION,
        };

        trace_fp = fopen(trace_file_name, "wb");
        if (!trace_fp) {
            return was_enabled;
        }

        trace_event_iter_init_all(&iter);
        if (fwrite(&header, sizeof header, 1, trace_fp) != 1 ||
            st_write_event_mapping(&iter) < 0) {
            fclose(trace_fp);
            trace_fp = NULL;
            return was_enabled;
        }

        /* Resume trace writeout */
        trace_writeout_enabled = true;
        flush_trace_file(false);
    } else {
        fclose(trace_fp);
        trace_fp = NULL;
    }
    return was_enabled;
}

/**
 * Set the name of a trace file
 *
 * @file        The trace file name or NULL for the default name-<pid> set at
 *              config time
 */
void st_set_trace_file(const char *file)
{
    bool saved_enable = st_set_trace_file_enabled(false);

    g_free(trace_file_name);

    if (!file) {
        /* Type cast needed for Windows where getpid() returns an int. */
        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid());
    } else {
        trace_file_name = g_strdup_printf("%s", file);
    }

    st_set_trace_file_enabled(saved_enable);
}

void st_print_trace_file_status(void)
{
    qemu_printf("Trace file \"%s\" %s.\n",
                trace_file_name, trace_fp ? "on" : "off");
}

void st_flush_trace_buffer(void)
{
    flush_trace_file(true);
}

/* Helper function to create a thread with signals blocked.  Use glib's
 * portable threads since QEMU abstractions cannot be used due to reentrancy in
 * the tracer.  Also note the signal masking on POSIX hosts so that the thread
 * does not steal signals when the rest of the program wants them blocked.
 */
static GThread *trace_thread_create(GThreadFunc fn)
{
    GThread *thread;
#ifndef _WIN32
    sigset_t set, oldset;

    sigfillset(&set);
    pthread_sigmask(SIG_SETMASK, &set, &oldset);
#endif

    thread = g_thread_new("trace-thread", fn, NULL);

#ifndef _WIN32
    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
#endif

    return thread;
}

bool st_init(void)
{
    GThread *thread;

    trace_pid = getpid();

    thread = trace_thread_create(writeout_thread);
    if (!thread) {
        warn_report("unable to initialize simple trace backend");
        return false;
    }

    atexit(st_flush_trace_buffer);
    return true;
}

void st_init_group(size_t group)
{
    TraceEventIter iter;

    if (!trace_writeout_enabled) {
        return;
    }

    trace_event_iter_init_group(&iter, group);
    st_write_event_mapping(&iter);
}