Confirmed users
507
edits
(21 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
= traceview = | |||
Java code can be effectively profiled with traceview, included in the Android SDK. See http://developer.android.com/guide/developing/tools/traceview.html | |||
* The Fennec build needs to be "[http://developer.android.com/guide/topics/manifest/application-element.html#debug debuggable]". Nightlies are not, but local developer builds are. | |||
* For profiling pageload, you can generally use [http://developer.android.com/tools/debugging/debugging-tracing.html Monitor/DDMS]. Just press the "start method tracing" button, load a page, wait, press the "stop method tracing" button. | |||
* For profiling startup time, you really need to add Debug.startMethodTracing(...) and Debug.stopMethodTracing(). [http://people.mozilla.org/~mfinkle/fennec/profiles/startup-tracing.patch Sample patch] | |||
* Or use the "am start" -P option, e.g. | |||
adb shell am start -n org.mozilla.fennec_$USER/.App -S -P /data/local/tmp/fennec.trace | |||
# wait for Fennec launch | |||
adb pull /data/local/tmp/fennec.trace | |||
# in Monitor, File > Open File > fennec.trace | |||
= Built-in Profiler = | |||
See https://developer.mozilla.org/en/Performance/Profiling_with_the_Built-in_Profiler#Profiling_Firefox_mobile | |||
= oprofile = | = oprofile = | ||
Line 59: | Line 76: | ||
static void armv7_pmnc_stop(void) | static void armv7_pmnc_stop(void) | ||
If you will be getting callgraphs, apply the patch from {{bug|674986}} | |||
Set the toolchain and compile: (the original kernel is built with 4.4.3 but NDK has 4.4.0; not sure if this matters...) | Set the toolchain and compile: (the original kernel is built with 4.4.3 but NDK has 4.4.0; not sure if this matters...) | ||
Line 139: | Line 158: | ||
echo "#! /bin/bash | echo "#! /bin/bash | ||
rm - | rm -rf oprofile | ||
mkdir oprofile | mkdir oprofile | ||
adb pull /data/oprofile oprofile/" > oppull | adb pull /data/oprofile oprofile/" > oppull | ||
Line 146: | Line 165: | ||
adb shell mount -o remount,ro /system | adb shell mount -o remount,ro /system | ||
The sample rate is (CPU freuqency in MHz) / (CPU_CYCLES count) / 64. For the HTC G2, it's 800MHz / | The sample rate is (CPU freuqency in MHz) / (CPU_CYCLES count) / 64. For the HTC G2, it's 800MHz / 125000 / 64 = 100 samples per second. | ||
== Using ''oprofile'' on CyanogenMod 7 == | == Using ''oprofile'' on CyanogenMod 7 == | ||
Line 367: | Line 386: | ||
This needs to be investigated, but it looks like ''oprofile'' compiled into the kernel does not have proper support for Nexus One hardware, and has to be fixed. | This needs to be investigated, but it looks like ''oprofile'' compiled into the kernel does not have proper support for Nexus One hardware, and has to be fixed. | ||
I encountered the same behavior when running opcontrol on the Galaxy Nexus and found that defining CONFIG_HW_PERF_EVENTS in the kernel .config file fixed the problem on the Galaxy Nexus. | |||
== Using oprofile on Galaxy Nexus == | |||
See also bug730900. | |||
There is no oprofile support in the factory / production Android build on the Galaxy Nexus. The engineering build (ie. full_maguro-eng configuration) includes user-mode support for oprofile (opcontrol, etc.), but no kernel support for oprofile (/dev/oprofilefs is not present). | |||
The approach used here is to build a custom kernel with oprofile support configured, patch the Android source tree with the new kernel and then build and flash an engineering build of Android. | |||
Full instructions for building Android begin at http://source.android.com/source/initializing.html. | |||
Here is the short version reflecting what I did, step-by-step: | |||
<nowiki> | |||
# Get Android sources, initialize environment | |||
mkdir <myandroid> | |||
cd <myandroid> | |||
curl https://dl-ssl.google.com/dl/googlesource/git-repo/repo > repo | |||
chmod +x repo | |||
./repo init -u https://android.googlesource.com/platform/manifest -b android-4.0.1_r1.2 | |||
./repo sync | |||
source build/envsetup.sh | |||
lunch full_maguro-eng | |||
<get the ITL41F binaries from http://code.google.com/android/nexus/drivers.html#maguroitl41f and extract them> | |||
# Get kernel source, customize for oprofile, build kernel | |||
mkdir <mykernel> | |||
cd <mykernel> | |||
git clone https://android.googlesource.com/kernel/omap.git | |||
cd omap | |||
git branch -a | |||
git checkout -t remotes/origin/android-omap-tuna-3.0-mr0 | |||
export ARCH=arm | |||
export CROSS_COMPILE=<myandroid>/prebuilt/linux-x86/toolchain/arm-eabi-4.4.3/bin/arm-eabi- | |||
make tuna_defconfig | |||
<edit .config, setting: | |||
CONFIG_PROFILING=y | |||
CONFIG_OPROFILE=y | |||
CONFIG_HAVE_OPROFILE=y | |||
CONFIG_HW_PERF_EVENTS=y | |||
CONFIG_PERF_EVENTS=y | |||
CONFIG_PERF_COUNTERS=y | |||
/edit> | |||
make | |||
</nowiki> | |||
The opcontrol project is in <myandroid>/external/oprofile/opcontrol; it includes special support for ARM_V7_A, but there is a bug in the makefile, Android.mk: -DWITH_ARM_V7_A is added to LOCAL_CFLAGS, but then LOCAL_CFLAGS is clobbered further down -- fix this with something like: | |||
<nowiki> | |||
LOCAL_C_INCLUDES := $(common_target_c_includes) | |||
LOCAL_CFLAGS := $(common_target_cflags) | |||
# Force ARM_V7_A | |||
LOCAL_CFLAGS += -DWITH_ARM_V7_A | |||
</nowiki> | |||
<nowiki> | |||
# Patch Android with my kernel, build, install images | |||
cp arch/arm/boot/zImage <myandroid>/device/samsung/tuna/kernel | |||
cd <myandroid> | |||
make | |||
<put phone into bootloader mode: power off, then hold volume up + volume down + power> | |||
fastboot -w flashall | |||
</nowiki> | |||
Once the phone reboots, verify the new kernel: | |||
<nowiki> | |||
mylinux$ adb shell dmesg | grep oprofile | |||
<6>[ 2.772705] oprofile: using arm/armv7-ca9 | |||
</nowiki> | |||
Now everything appears to be working, but I cannot seem to collect samples! | |||
<nowiki> | |||
root@android:/data/local # opcontrol --reset --kernel-range=0xc004d000,0xc0752000 --event=CPU_CYCLES:1 --setup --status --verbose-log=all --verbose | |||
list_events = 0 | |||
setup = 1 | |||
Configure /dev/oprofile/0/user (1) | |||
Configure /dev/oprofile/0/kernel (1) | |||
Configure /dev/oprofile/0/unit_mask (0) | |||
Configure /dev/oprofile/0/enabled (1) | |||
Configure /dev/oprofile/0/count (1) | |||
Configure /dev/oprofile/0/event (255) | |||
Configure /dev/oprofile/1/enabled (0) | |||
Configure /dev/oprofile/2/enabled (0) | |||
Configure /dev/oprofile/3/enabled (0) | |||
Configure /dev/oprofile/4/enabled (0) | |||
Starting oprofiled... | |||
command: oprofiled --session-dir=/data/oprofile --events=CPU_CYCLES:255:0:1:0:1:1 --no-vmlinux -r 0xc004d000,0xc0752000 --verbose=all | |||
Events: CPU_CYCLES:255:0:1:0:1:1 | |||
Using 2.6+ OProfile kernel interface. | |||
Using log file /data/oprofile/samples/oprofiled.log | |||
Ready | |||
Driver directory: /dev/oprofile | |||
Session directory: /data/oprofile | |||
Counter 0: | |||
name: CPU_CYCLES | |||
count: 1 | |||
Counter 1 disabled | |||
Counter 2 disabled | |||
Counter 3 disabled | |||
Counter 4 disabled | |||
oprofiled pid: 1528 | |||
profiler is not running | |||
cpu1 0 samples received | |||
cpu1 0 samples lost overflow | |||
cpu1 0 samples invalid eip | |||
cpu1 0 backtrace aborted | |||
cpu0 0 samples received | |||
cpu0 0 samples lost overflow | |||
cpu0 0 samples invalid eip | |||
cpu0 0 backtrace aborted | |||
root@android:/data/local # | |||
root@android:/data/local # opcontrol --start | |||
root@android:/data/local # opcontrol --status --verbose | |||
list_events = 0 | |||
setup = 0 | |||
Driver directory: /dev/oprofile | |||
Session directory: /data/oprofile | |||
Counter 0: | |||
name: CPU_CYCLES | |||
count: 1 | |||
Counter 1 disabled | |||
Counter 2 disabled | |||
Counter 3 disabled | |||
Counter 4 disabled | |||
oprofiled pid: 1528 | |||
profiler is running | |||
cpu1 0 samples received | |||
cpu1 0 samples lost overflow | |||
cpu1 0 samples invalid eip | |||
cpu1 0 backtrace aborted | |||
cpu0 0 samples received | |||
cpu0 0 samples lost overflow | |||
cpu0 0 samples invalid eip | |||
cpu0 0 backtrace aborted | |||
root@android:/data/local # opcontrol --stop | |||
root@android:/data/local # ls -lR /data/oprofile | |||
/data/oprofile: | |||
-rw-rw-rw- root root 724 2012-03-07 16:43 abi | |||
-rw-rw-rw- root root 2 2012-03-07 16:46 complete_dump | |||
-rw-rw-rw- root root 4 2012-03-07 16:43 lock | |||
prw-rw-rw- root root 2012-03-07 16:43 opd_pipe | |||
drwxr-xr-x root root 2012-03-07 16:43 samples | |||
/data/oprofile/samples: | |||
-rw-r--r-- root root 57424 2012-03-07 16:46 oprofiled.log | |||
root@android:/data/local # tail /data/oprofile/samples/oprofiled.log | |||
... | |||
CPU_SWITCH to 1 | |||
CPU_SWITCH to 0 | |||
CPU_SWITCH to 1 | |||
CPU_SWITCH to 0 | |||
CPU_SWITCH to 1 | |||
</nowiki> | |||
== Using ''oprofile'' == | == Using ''oprofile'' == | ||
Line 420: | Line 597: | ||
Patch PowerTOP using this patch: | Patch PowerTOP using this patch: | ||
'''TODO''': Investigate other changes to PowerTOP source to make it run better on Android | |||
diff --git a/powertop-1.13/Makefile b/powertop-1.13/Makefile | diff --git a/powertop-1.13/Makefile b/powertop-1.13/Makefile | ||
Line 613: | Line 792: | ||
adb shell | adb shell | ||
TERMINFO=/system/etc/terminfo /data/powertop | TERMINFO=/system/etc/terminfo /data/powertop | ||
= systemtap = | |||
== Compiling systemtap == | |||
Think of the children, get the [http://www.jnchen.com/_media/projects/mozilla/moz-systemtap-1.5.tar.bz2 prebuilt version]. Extract it to your host machine, and to /data on your device. | |||
Basically running systemtap consists of the host portion and the target portion. For the host portion you use a standard systemtap installation to cross compile your script into a kernel module. For the target portion you transfer the kernel module to your device and run it on the device. | |||
The host portion you can get through apt-get. The target portion you have to compile (use the [http://www.codesourcery.com/sgpp/lite/arm/portal/subscription?@template=lite CodeSourcery ARM GNU/Linux toolchain]): | |||
LDFLAGS=-static \ | |||
ac_cv_file__usr_include_avahi_common=no \ | |||
ac_cv_file__usr_include_avahi_client=no \ | |||
ac_cv_file__usr_include_nspr=no \ | |||
ac_cv_file__usr_include_nspr4=no \ | |||
ac_cv_file__usr_include_nss=no \ | |||
ac_cv_file__usr_include_nss3=no \ | |||
./configure --prefix=/data/systemtap --host=arm-none-linux-gnueabi \ | |||
--disable-translator --disable-nls | |||
make install | |||
Note that prefix has to be /data/systemtap (or another path that matches Android file system) so you need to make a /data directory on your host machine before you run make install. | |||
You might need libelf. Get the latest elfutils release, then compile: | |||
./configure --host=arm-none-linux-gnueabi --disable-nls \ | |||
--prefix=/PATH/TO/TOOLCHAIN/arm-none-linux-gnueabi/libc/usr | |||
cd libelf | |||
make install | |||
Where /PATH/TO/TOOLCHAIN is the path to your CodeSourcery toolchain. | |||
== Using systemtap == | |||
Before you start, you need the source of your device's kernel. See the oprofile section for one example way to get the kernel source. | |||
To compile your script on the host, make sure you have the NDK toolchain (r5 or above), then run | |||
stap -gv -a arm -B CROSS_COMPILE=arm-linux-androideabi- \ | |||
-r /PATH/TO/KERNEL -R /PATH/TO/SYSTEMTAP/RUNTIME \ | |||
-m NAME -B CFLAGS_MODULE="-DMODULE -fno-pic" NAME.stp | |||
where NAME is the name of your script, /PATH/TO/KERNEL is the path to your kernel source, and /PATH/TO/SYSTEMTAP/RUNTIME is the path to your target systemtap runtime (for example if you're using the prebuilt version above, /PATH/TO/SYSTEMTAP/RUNTIME = systemtap/share/systemtap/runtime) | |||
adb push your kernel module to your device, then run | |||
staprun -x PID NAME.ko | |||
where PID is the pid of fennec process | |||
== Sample systemtap scripts == | |||
=== glandium's I/O tracking script === | |||
global targetpid; | |||
global file_path; | |||
probe begin { | |||
targetpid = target(); | |||
} | |||
probe kernel.function("__do_page_cache_readahead") { | |||
if (targetpid == pid()) | |||
file_path[tid()] = d_path(&$filp->f_path); | |||
} | |||
probe kernel.function("do_mpage_readpage") { | |||
if (targetpid == pid() && (tid() in file_path)) { | |||
now = gettimeofday_us(); | |||
printf("%d %s %d\n", now, file_path[tid()], $page->index*4096); | |||
} | |||
} | |||
probe kernel.function("__do_page_cache_readahead").return { | |||
if (targetpid == pid()) | |||
delete file_path[tid()]; | |||
} | |||
=== jchen's I/O tracking script === | |||
Note that this script accesses the hardware timer directly to overcome limitations in the systemtap gettimeofday() resolution (basically in systemtap, gettimeofday() is limited to jiffy resolution). | |||
This also means to run it on a processor other than a MSM7x30, get_rawtime() will need to change. | |||
%{ | |||
#include <arch/arm/include/asm/io.h> | |||
#define MSM_CSR_BASE ((void*)0xF8001000) | |||
#define MSM_TMR_BASE MSM_CSR_BASE | |||
#define MSM_GPT_BASE (MSM_TMR_BASE + 0x04) | |||
#define MSM_DGT_BASE (MSM_TMR_BASE + 0x24) | |||
#define TIMER_COUNT_VAL 0x0004 | |||
%} | |||
function get_rawtime:long () %{ /* pure */ /* unprivileged */ | |||
cycles_t c = readl(MSM_DGT_BASE + TIMER_COUNT_VAL); | |||
THIS->__retvalue = ((int64_t) (int32_t) c) - global.s_origin_time; | |||
%} | |||
global parentid, childid; | |||
global start_time, total_time; | |||
global origin_time; | |||
global creation_time; | |||
probe begin { | |||
parentid = 0; | |||
childid = 0; | |||
origin_time = get_rawtime() | |||
} | |||
function is_parent:long () { | |||
if (pid() == parentid) | |||
return parentid; | |||
if (0 == parentid && isinstr(execname(), "fennec")) { | |||
parentid = pid(); | |||
return parentid; | |||
} | |||
return 0; | |||
} | |||
function is_child:long () { | |||
if (pid() == childid) | |||
return childid; | |||
if (0 == childid && isinstr(execname(), "plugin")) { | |||
childid = pid(); | |||
return childid; | |||
} | |||
return 0; | |||
} | |||
probe syscall.fork.return { | |||
if (is_parent() || is_child()) { | |||
if ($return != 0) { | |||
thdid = is_parent() ? $return : -$return; | |||
creation_time[thdid] = get_rawtime(); | |||
total_time[thdid, "", creation_time[thdid] / 8192] = -1; | |||
} | |||
} | |||
} | |||
probe syscall.execve.return { | |||
if (is_parent() || is_child()) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
creation_time[thdid] = get_rawtime(); | |||
total_time[thdid, "", creation_time[thdid] / 8192] = -1; | |||
} | |||
} | |||
probe syscall.exit { | |||
if (is_parent() || is_child()) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
total_time[thdid, "", creation_time[thdid] / 8192] = | |||
get_rawtime() - creation_time[thdid]; | |||
} | |||
} | |||
probe vfs.read { | |||
if (bytes_to_read > 0 && (is_parent() || is_child())) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
start_time[thdid] = get_rawtime(); | |||
} | |||
} | |||
probe vfs.read.return { | |||
if (bytes_to_read > 0 && (is_parent() || is_child())) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
fn = "rd:" . __file_filename(file); | |||
st = start_time[thdid]; | |||
total_time[thdid, fn, st / 8192] += get_rawtime() - st; | |||
} | |||
} | |||
probe kernel.function("filemap_fault") { | |||
if (is_parent() || is_child()) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
start_time[thdid] = get_rawtime(); | |||
} | |||
} | |||
probe kernel.function("filemap_fault").return { | |||
if (is_parent() || is_child()) { | |||
thdid = is_parent() ? tid() : -tid(); | |||
fn = "mm:" . __file_filename($vma->vm_file); | |||
st = start_time[thdid]; | |||
total_time[thdid, fn, st / 8192] += get_rawtime() - st; | |||
} | |||
} | |||
probe end { | |||
foreach ([t, fn, st] in total_time) { | |||
printf("%d, %s, %d, %d\n", t, fn, st, total_time[t, fn, st]); | |||
} | |||
} |